Skip to content

Commit

Permalink
2.4 Parser (basic)
Browse files Browse the repository at this point in the history
  • Loading branch information
cedrickchee committed Mar 26, 2020
1 parent 02a5f71 commit b54dc0a
Show file tree
Hide file tree
Showing 3 changed files with 255 additions and 0 deletions.
76 changes: 76 additions & 0 deletions ast/ast.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package ast

// Packge ast implement the Abstract Syntax Tree (AST) that represents the
// parsed source code before being passed on to the interpreter for evaluation.

import "github.com/cedrickchee/hou/token"

// Node defines an interface for all nodes in the AST.
type Node interface {
// Returns the literal value of the token it's associated with.
// This method will be used only for debugging and testing.
TokenLiteral() string
}

// Statement defines the interface for all statement nodes.
type Statement interface {
// Some of these nodes implement the Statement interface.
Node
statementNode()
}

// Expression defines the interface for all expression nodes.
type Expression interface {
// Some of these nodes implement the Expression interface.
Node
expressionNode()
}

// =============================================================================
// Implementation of Node
// =============================================================================

// Program is the root node of every AST. Every valid program is a series of
// statements.
type Program struct {
// A program consists of a slice of AST nodes that implement the Statement
// interface.
Statements []Statement
}

// TokenLiteral prints the literal value of the token associated with this node.
func (p *Program) TokenLiteral() string {
if len(p.Statements) > 0 {
return p.Statements[0].TokenLiteral()
} else {
return ""
}
}

// LetStatement the `let` statement represents the AST node that binds an
// expression to an identifier
type LetStatement struct {
Token token.Token // the token.LET token
// Name hold the identifier of the binding and Value for the expression
// that produces the value.
Name *Identifier
Value Expression
}

func (ls *LetStatement) statementNode() {}

// TokenLiteral prints the literal value of the token associated with this node.
func (ls *LetStatement) TokenLiteral() string { return ls.Token.Literal }

// Identifier is a node that holds the literal value of an identifier
type Identifier struct {
Token token.Token // the token.IDENT token
Value string
}

// To hold the identifier of the binding, the x in let x = 5; , we have the
// Identifier struct type, which implements the Expression interface.
func (i *Identifier) expressionNode() {}

// TokenLiteral prints the literal value of the token associated with this node.
func (i *Identifier) TokenLiteral() string { return i.Token.Literal }
112 changes: 112 additions & 0 deletions parser/parser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package parser

// Package parser implements the parser that takes as input tokens from the
// lexer and produces as output an AST (Abstract Syntax Tree).

import (
"github.com/cedrickchee/hou/ast"
"github.com/cedrickchee/hou/lexer"
"github.com/cedrickchee/hou/token"
)

// Parser implements the parser.
type Parser struct {
l *lexer.Lexer

curToken token.Token
peekToken token.Token
}

// New constructs a new Parser with a Lexer as input.
func New(l *lexer.Lexer) *Parser {
p := &Parser{l: l}

// Read two tokens, so curToken and peekToken are both set.
p.nextToken()
p.nextToken()

return p
}

// Helper method that advances both curToken and peekToken.
func (p *Parser) nextToken() {
p.curToken = p.peekToken
p.peekToken = p.l.NextToken()
}

// ParseProgram starts the parsing process and is the entry point for all other
// sub-parsers that are responsible for other nodes in the AST.
func (p *Parser) ParseProgram() *ast.Program {
// Construct the root node of the AST.
program := &ast.Program{}
program.Statements = []ast.Statement{}

// Iterate over every token in the input until it encounters an token.EOF
// token.
for p.curToken.Type != token.EOF {
stmt := p.parseStatement()
if stmt != nil {
program.Statements = append(program.Statements, stmt)
}
p.nextToken()
}
return program
}

// Parse a statement.
func (p *Parser) parseStatement() ast.Statement {
switch p.curToken.Type {
case token.LET:
return p.parseLetStatement()
default:
return nil
}
}

func (p *Parser) parseLetStatement() *ast.LetStatement {
// Constructs an *ast.LetStatement node with the token it’s currently
// sitting on (a token.LET token).
stmt := &ast.LetStatement{Token: p.curToken}

// Advances the tokens while making assertions about the next token.
if !p.expectPeek(token.IDENT) {
return nil
}

// Use token.IDENT token to construct an *ast.Identifier node.
stmt.Name = &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal}

// Expects an equal sign and jumps over the expression following the
// equal sign.
if !p.expectPeek(token.ASSIGN) {
return nil
}

// TODO: We're skipping the expressions until we
// encounter a semicolon
for !p.curTokenIs(token.SEMICOLON) {
p.nextToken()
}

return stmt
}

// "assertion functions".
// Enforce the correctness of the order of tokens by checking the type of the
// next token.
func (p *Parser) expectPeek(t token.TokenType) bool {
if p.peekTokenIs(t) {
p.nextToken()
return true
} else {
return false
}
}

func (p *Parser) peekTokenIs(t token.TokenType) bool {
return p.peekToken.Type == t
}

func (p *Parser) curTokenIs(t token.TokenType) bool {
return p.curToken.Type == t
}
67 changes: 67 additions & 0 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package parser

import (
"testing"

"github.com/cedrickchee/hou/ast"
"github.com/cedrickchee/hou/lexer"
)

func TestLetStatements(t *testing.T) {
input := `
let x = 5;
let y = 10;
let foobar = 838383;
`
l := lexer.New(input)
p := New(l)

program := p.ParseProgram()
if program == nil {
t.Fatalf("ParseProgram() returned nil")
}
if len(program.Statements) != 3 {
t.Fatalf("program.Statements does not contain 3 statements. got=%d",
len(program.Statements))
}

tests := []struct {
expectedIdentifier string
}{
{"x"},
{"y"},
{"foobar"},
}

for i, tt := range tests {
stmt := program.Statements[i]
if !testLetStatement(t, stmt, tt.expectedIdentifier) {
return
}
}
}

func testLetStatement(t *testing.T, s ast.Statement, name string) bool {
if s.TokenLiteral() != "let" {
t.Errorf("s.TokenLiteral not 'let'. got=%q", s.TokenLiteral())
return false
}

letStmt, ok := s.(*ast.LetStatement)
if !ok {
t.Errorf("s not *ast.LetStatement. got=%T", s)
return false
}

if letStmt.Name.Value != name {
t.Errorf("letStmt.Name.Value not '%s'. got=%s", name, letStmt.Name.Value)
return false
}

if letStmt.Name.TokenLiteral() != name {
t.Errorf("s.Name not '%s'. got=%s", name, letStmt.Name)
return false
}

return true
}

0 comments on commit b54dc0a

Please sign in to comment.