Skip to content

Commit

Permalink
2.6 Pratt Parser (prefix)
Browse files Browse the repository at this point in the history
  • Loading branch information
cedrickchee committed Mar 27, 2020
1 parent c44317d commit 5e371a7
Show file tree
Hide file tree
Showing 4 changed files with 406 additions and 2 deletions.
121 changes: 120 additions & 1 deletion ast/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,19 @@ package ast
// Packge ast implement the Abstract Syntax Tree (AST) that represents the
// parsed source code before being passed on to the interpreter for evaluation.

import "github.com/cedrickchee/hou/token"
import (
"bytes"

"github.com/cedrickchee/hou/token"
)

// Node defines an interface for all nodes in the AST.
type Node interface {
// Returns the literal value of the token it's associated with.
// This method will be used only for debugging and testing.
TokenLiteral() string
// Returns a stringified version of the AST for debugging.
String() string
}

// Statement defines the interface for all statement nodes.
Expand Down Expand Up @@ -47,6 +53,20 @@ func (p *Program) TokenLiteral() string {
}
}

// String returns a stringified version of the AST for debugging.
func (p *Program) String() string {
// Creates a buffer and writes the return value of each statements String()
// method to it.
var out bytes.Buffer

for _, s := range p.Statements {
// Delegates most of program work to the Statements of *ast.Program.
out.WriteString(s.String())
}

return out.String()
}

// LetStatement the `let` statement represents the AST node that binds an
// expression to an identifier
type LetStatement struct {
Expand All @@ -62,6 +82,23 @@ func (ls *LetStatement) statementNode() {}
// TokenLiteral prints the literal value of the token associated with this node.
func (ls *LetStatement) TokenLiteral() string { return ls.Token.Literal }

// String returns a stringified version of the `let` node.
func (ls *LetStatement) String() string {
var out bytes.Buffer

out.WriteString(ls.TokenLiteral() + " ")
out.WriteString(ls.Name.String())
out.WriteString(" = ")

if ls.Value != nil {
out.WriteString(ls.Value.String())
}

out.WriteString(";")

return out.String()
}

// Identifier is a node that holds the literal value of an identifier
type Identifier struct {
Token token.Token // the token.IDENT token
Expand All @@ -75,6 +112,11 @@ func (i *Identifier) expressionNode() {}
// TokenLiteral prints the literal value of the token associated with this node.
func (i *Identifier) TokenLiteral() string { return i.Token.Literal }

// String returns a stringified version of the identifier node.
func (i *Identifier) String() string {
return i.Value
}

// ReturnStatement the `return` statement that represents the AST node that
// holds a return value to the outter stack in the call stack.
type ReturnStatement struct {
Expand All @@ -86,3 +128,80 @@ func (rs *ReturnStatement) statementNode() {}

// TokenLiteral prints the literal value of the token associated with this node.
func (rs *ReturnStatement) TokenLiteral() string { return rs.Token.Literal }

// String returns a stringified version of the `return` node.
func (rs *ReturnStatement) String() string {
var out bytes.Buffer

out.WriteString(rs.TokenLiteral() + " ")

if rs.ReturnValue != nil {
out.WriteString(rs.ReturnValue.String())
}

out.WriteString(";")

return out.String()
}

// ExpressionStatement represents an expression node.
type ExpressionStatement struct {
Token token.Token // the first token of the expression
Expression Expression
}

func (es *ExpressionStatement) statementNode() {}

// TokenLiteral prints the literal value of the token associated with this node.
func (es *ExpressionStatement) TokenLiteral() string { return es.Token.Literal }

// String returns a stringified version of the expression node
func (es *ExpressionStatement) String() string {
// The nil-checks will be taken out, later on, when we can fully build
// expressions.
if es.Expression != nil {
return es.Expression.String()
}
return ""
}

// IntegerLiteral represents a literal integer node.
type IntegerLiteral struct {
Token token.Token
Value int64
}

func (il *IntegerLiteral) expressionNode() {}

// TokenLiteral prints the literal value of the token associated with this node.
func (il *IntegerLiteral) TokenLiteral() string { return il.Token.Literal }

// String returns a stringified version of the expression node.
func (il *IntegerLiteral) String() string { return il.Token.Literal }

// PrefixExpression represents a prefix expression node.
type PrefixExpression struct {
Token token.Token // The prefix token, e.g. !
Operator string
Right Expression
}

func (pe *PrefixExpression) expressionNode() {}

// TokenLiteral prints the literal value of the token associated with this node.
func (pe *PrefixExpression) TokenLiteral() string { return pe.Token.Literal }

// String returns a stringified version of the expression node.
func (pe *PrefixExpression) String() string {
var out bytes.Buffer

// We deliberately add parentheses around the operator and its operand,
// the expression in Right. That allows us to see which operands belong to
// which operator.
out.WriteString("(")
out.WriteString(pe.Operator)
out.WriteString(pe.Right.String())
out.WriteString(")")

return out.String()
}
29 changes: 29 additions & 0 deletions ast/ast_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package ast

import (
"testing"

"github.com/cedrickchee/hou/token"
)

func TestString(t *testing.T) {
program := &Program{
Statements: []Statement{
&LetStatement{
Token: token.Token{Type: token.LET, Literal: "let"},
Name: &Identifier{
Token: token.Token{Type: token.IDENT, Literal: "myVar"},
Value: "myVar",
},
Value: &Identifier{
Token: token.Token{Type: token.IDENT, Literal: "anotherVar"},
Value: "anotherVar",
},
},
},
}

if program.String() != "let myVar = anotherVar;" {
t.Errorf("program.String() wrong. got=%q", program.String())
}
}
129 changes: 128 additions & 1 deletion parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,40 @@ package parser

import (
"fmt"
"strconv"

"github.com/cedrickchee/hou/ast"
"github.com/cedrickchee/hou/lexer"
"github.com/cedrickchee/hou/token"
)

// Define the precedences of the language.
// These constants is able to answer: "does the * operator have a higher
// precedence than the == operator? Does a prefix operator have a higher
// preference than a call expression?"
const (
_ int = iota
LOWEST // lowest possible precedence
EQUALS // ==
LESSGREATER // > or <
SUM // +
PRODUCT // *
PREFIX // -X or !X
CALL // myFunction(X)
)

// Pratt parser's idea is the association of parsing functions with token types.
// Whenever this token type is encountered, the parsing functions are called to
// parse the appropriate expression and return an AST node that represents it.
// Each token type can have up to two parsing functions associated with it,
// depending on whether the token is found in a prefix or an infix position.
type (
prefixParseFn func() ast.Expression
// This function argument is "left side" of the infix operator that’s being
// parsed.
infixParseFn func(ast.Expression) ast.Expression
)

// Parser implements the parser.
type Parser struct {
l *lexer.Lexer
Expand All @@ -19,6 +47,11 @@ type Parser struct {

curToken token.Token
peekToken token.Token

// maps to get the correct prefixParseFn or infixParseFn for the current
// token type.
prefixParseFns map[token.TokenType]prefixParseFn
infixParseFns map[token.TokenType]infixParseFn
}

// New constructs a new Parser with a Lexer as input.
Expand All @@ -28,6 +61,13 @@ func New(l *lexer.Lexer) *Parser {
errors: []string{},
}

// Initialize the prefixParseFns map.
p.prefixParseFns = make(map[token.TokenType]prefixParseFn)
p.registerPrefix(token.IDENT, p.parseIdentifier)
p.registerPrefix(token.INT, p.parseIntegerLiteral)
p.registerPrefix(token.BANG, p.parsePrefixExpression)
p.registerPrefix(token.MINUS, p.parsePrefixExpression)

// Read two tokens, so curToken and peekToken are both set.
p.nextToken()
p.nextToken()
Expand Down Expand Up @@ -81,7 +121,7 @@ func (p *Parser) parseStatement() ast.Statement {
case token.RETURN:
return p.parseReturnStatement()
default:
return nil
return p.parseExpressionStatement()
}
}

Expand Down Expand Up @@ -126,6 +166,83 @@ func (p *Parser) parseReturnStatement() *ast.ReturnStatement {
return stmt
}

// The top-level method that kicks off expression parsing.
func (p *Parser) parseExpressionStatement() *ast.ExpressionStatement {
stmt := &ast.ExpressionStatement{Token: p.curToken}

stmt.Expression = p.parseExpression(LOWEST)

if p.peekTokenIs(token.SEMICOLON) {
p.nextToken()
}

return stmt
}

// Check whether there's a parsing function associated with p.curToken.Type in
// the prefix position.
func (p *Parser) parseExpression(precedence int) ast.Expression {
prefix := p.prefixParseFns[p.curToken.Type]
if prefix == nil {
// noPrefixParseFnError give us better error messages when
// program.Statements does not contain one statement but simply one nil.
p.noPrefixParseFnError(p.curToken.Type)
return nil
}

leftExp := prefix()

return leftExp
}

func (p *Parser) parseIdentifier() ast.Expression {
// This method doesn’t advance the tokens, it doesn’t call nextToken.
// That’s important.
// All of our parsing functions, prefixParseFn or infixParseFn, are going to
// follow this protocol:
// start with curToken being the type of token you’re associated with and
// return with curToken being the last token that’s part of your expression
// type. Never advance the tokens too far.
return &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal}
}

func (p *Parser) noPrefixParseFnError(t token.TokenType) {
msg := fmt.Sprintf("no prefix parse function for %s found", t)
p.errors = append(p.errors, msg)
}

func (p *Parser) parseIntegerLiteral() ast.Expression {
lit := &ast.IntegerLiteral{Token: p.curToken}

value, err := strconv.ParseInt(p.curToken.Literal, 0, 64)
if err != nil {
msg := fmt.Sprintf("could not parse %q as integer", p.curToken.Literal)
p.errors = append(p.errors, msg)
return nil
}

lit.Value = value

return lit
}

func (p *Parser) parsePrefixExpression() ast.Expression {
expression := &ast.PrefixExpression{
Token: p.curToken,
Operator: p.curToken.Literal,
}

// Advances our tokens in order to correctly parse a prefix expression
// like `-5` more than one token has to be "consumed".
p.nextToken()

// parseExpression() value changes depending on the caller's knowledge and
// its context.
expression.Right = p.parseExpression(PREFIX)

return expression
}

// "assertion functions".
// Enforce the correctness of the order of tokens by checking the type of the
// next token.
Expand All @@ -145,3 +262,13 @@ func (p *Parser) peekTokenIs(t token.TokenType) bool {
func (p *Parser) curTokenIs(t token.TokenType) bool {
return p.curToken.Type == t
}

// Helper method that add entries to the prefixParseFns map.
func (p *Parser) registerPrefix(tokenType token.TokenType, fn prefixParseFn) {
p.prefixParseFns[tokenType] = fn
}

// Helper method that add entries to the infixParseFns map.
func (p *Parser) registerInfix(tokenType token.TokenType, fn infixParseFn) {
p.infixParseFns[tokenType] = fn
}
Loading

0 comments on commit 5e371a7

Please sign in to comment.