-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
02a5f71
commit b54dc0a
Showing
3 changed files
with
255 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
package ast | ||
|
||
// Packge ast implement the Abstract Syntax Tree (AST) that represents the | ||
// parsed source code before being passed on to the interpreter for evaluation. | ||
|
||
import "github.com/cedrickchee/hou/token" | ||
|
||
// Node defines an interface for all nodes in the AST. | ||
type Node interface { | ||
// Returns the literal value of the token it's associated with. | ||
// This method will be used only for debugging and testing. | ||
TokenLiteral() string | ||
} | ||
|
||
// Statement defines the interface for all statement nodes. | ||
type Statement interface { | ||
// Some of these nodes implement the Statement interface. | ||
Node | ||
statementNode() | ||
} | ||
|
||
// Expression defines the interface for all expression nodes. | ||
type Expression interface { | ||
// Some of these nodes implement the Expression interface. | ||
Node | ||
expressionNode() | ||
} | ||
|
||
// ============================================================================= | ||
// Implementation of Node | ||
// ============================================================================= | ||
|
||
// Program is the root node of every AST. Every valid program is a series of | ||
// statements. | ||
type Program struct { | ||
// A program consists of a slice of AST nodes that implement the Statement | ||
// interface. | ||
Statements []Statement | ||
} | ||
|
||
// TokenLiteral prints the literal value of the token associated with this node. | ||
func (p *Program) TokenLiteral() string { | ||
if len(p.Statements) > 0 { | ||
return p.Statements[0].TokenLiteral() | ||
} else { | ||
return "" | ||
} | ||
} | ||
|
||
// LetStatement the `let` statement represents the AST node that binds an | ||
// expression to an identifier | ||
type LetStatement struct { | ||
Token token.Token // the token.LET token | ||
// Name hold the identifier of the binding and Value for the expression | ||
// that produces the value. | ||
Name *Identifier | ||
Value Expression | ||
} | ||
|
||
func (ls *LetStatement) statementNode() {} | ||
|
||
// TokenLiteral prints the literal value of the token associated with this node. | ||
func (ls *LetStatement) TokenLiteral() string { return ls.Token.Literal } | ||
|
||
// Identifier is a node that holds the literal value of an identifier | ||
type Identifier struct { | ||
Token token.Token // the token.IDENT token | ||
Value string | ||
} | ||
|
||
// To hold the identifier of the binding, the x in let x = 5; , we have the | ||
// Identifier struct type, which implements the Expression interface. | ||
func (i *Identifier) expressionNode() {} | ||
|
||
// TokenLiteral prints the literal value of the token associated with this node. | ||
func (i *Identifier) TokenLiteral() string { return i.Token.Literal } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
package parser | ||
|
||
// Package parser implements the parser that takes as input tokens from the | ||
// lexer and produces as output an AST (Abstract Syntax Tree). | ||
|
||
import ( | ||
"github.com/cedrickchee/hou/ast" | ||
"github.com/cedrickchee/hou/lexer" | ||
"github.com/cedrickchee/hou/token" | ||
) | ||
|
||
// Parser implements the parser. | ||
type Parser struct { | ||
l *lexer.Lexer | ||
|
||
curToken token.Token | ||
peekToken token.Token | ||
} | ||
|
||
// New constructs a new Parser with a Lexer as input. | ||
func New(l *lexer.Lexer) *Parser { | ||
p := &Parser{l: l} | ||
|
||
// Read two tokens, so curToken and peekToken are both set. | ||
p.nextToken() | ||
p.nextToken() | ||
|
||
return p | ||
} | ||
|
||
// Helper method that advances both curToken and peekToken. | ||
func (p *Parser) nextToken() { | ||
p.curToken = p.peekToken | ||
p.peekToken = p.l.NextToken() | ||
} | ||
|
||
// ParseProgram starts the parsing process and is the entry point for all other | ||
// sub-parsers that are responsible for other nodes in the AST. | ||
func (p *Parser) ParseProgram() *ast.Program { | ||
// Construct the root node of the AST. | ||
program := &ast.Program{} | ||
program.Statements = []ast.Statement{} | ||
|
||
// Iterate over every token in the input until it encounters an token.EOF | ||
// token. | ||
for p.curToken.Type != token.EOF { | ||
stmt := p.parseStatement() | ||
if stmt != nil { | ||
program.Statements = append(program.Statements, stmt) | ||
} | ||
p.nextToken() | ||
} | ||
return program | ||
} | ||
|
||
// Parse a statement. | ||
func (p *Parser) parseStatement() ast.Statement { | ||
switch p.curToken.Type { | ||
case token.LET: | ||
return p.parseLetStatement() | ||
default: | ||
return nil | ||
} | ||
} | ||
|
||
func (p *Parser) parseLetStatement() *ast.LetStatement { | ||
// Constructs an *ast.LetStatement node with the token it’s currently | ||
// sitting on (a token.LET token). | ||
stmt := &ast.LetStatement{Token: p.curToken} | ||
|
||
// Advances the tokens while making assertions about the next token. | ||
if !p.expectPeek(token.IDENT) { | ||
return nil | ||
} | ||
|
||
// Use token.IDENT token to construct an *ast.Identifier node. | ||
stmt.Name = &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal} | ||
|
||
// Expects an equal sign and jumps over the expression following the | ||
// equal sign. | ||
if !p.expectPeek(token.ASSIGN) { | ||
return nil | ||
} | ||
|
||
// TODO: We're skipping the expressions until we | ||
// encounter a semicolon | ||
for !p.curTokenIs(token.SEMICOLON) { | ||
p.nextToken() | ||
} | ||
|
||
return stmt | ||
} | ||
|
||
// "assertion functions". | ||
// Enforce the correctness of the order of tokens by checking the type of the | ||
// next token. | ||
func (p *Parser) expectPeek(t token.TokenType) bool { | ||
if p.peekTokenIs(t) { | ||
p.nextToken() | ||
return true | ||
} else { | ||
return false | ||
} | ||
} | ||
|
||
func (p *Parser) peekTokenIs(t token.TokenType) bool { | ||
return p.peekToken.Type == t | ||
} | ||
|
||
func (p *Parser) curTokenIs(t token.TokenType) bool { | ||
return p.curToken.Type == t | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
package parser | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/cedrickchee/hou/ast" | ||
"github.com/cedrickchee/hou/lexer" | ||
) | ||
|
||
func TestLetStatements(t *testing.T) { | ||
input := ` | ||
let x = 5; | ||
let y = 10; | ||
let foobar = 838383; | ||
` | ||
l := lexer.New(input) | ||
p := New(l) | ||
|
||
program := p.ParseProgram() | ||
if program == nil { | ||
t.Fatalf("ParseProgram() returned nil") | ||
} | ||
if len(program.Statements) != 3 { | ||
t.Fatalf("program.Statements does not contain 3 statements. got=%d", | ||
len(program.Statements)) | ||
} | ||
|
||
tests := []struct { | ||
expectedIdentifier string | ||
}{ | ||
{"x"}, | ||
{"y"}, | ||
{"foobar"}, | ||
} | ||
|
||
for i, tt := range tests { | ||
stmt := program.Statements[i] | ||
if !testLetStatement(t, stmt, tt.expectedIdentifier) { | ||
return | ||
} | ||
} | ||
} | ||
|
||
func testLetStatement(t *testing.T, s ast.Statement, name string) bool { | ||
if s.TokenLiteral() != "let" { | ||
t.Errorf("s.TokenLiteral not 'let'. got=%q", s.TokenLiteral()) | ||
return false | ||
} | ||
|
||
letStmt, ok := s.(*ast.LetStatement) | ||
if !ok { | ||
t.Errorf("s not *ast.LetStatement. got=%T", s) | ||
return false | ||
} | ||
|
||
if letStmt.Name.Value != name { | ||
t.Errorf("letStmt.Name.Value not '%s'. got=%s", name, letStmt.Name.Value) | ||
return false | ||
} | ||
|
||
if letStmt.Name.TokenLiteral() != name { | ||
t.Errorf("s.Name not '%s'. got=%s", name, letStmt.Name) | ||
return false | ||
} | ||
|
||
return true | ||
} |