diff --git a/ast/ast.go b/ast/ast.go new file mode 100644 index 0000000..3f830c1 --- /dev/null +++ b/ast/ast.go @@ -0,0 +1,76 @@ +package ast + +// Packge ast implement the Abstract Syntax Tree (AST) that represents the +// parsed source code before being passed on to the interpreter for evaluation. + +import "github.com/cedrickchee/hou/token" + +// Node defines an interface for all nodes in the AST. +type Node interface { + // Returns the literal value of the token it's associated with. + // This method will be used only for debugging and testing. + TokenLiteral() string +} + +// Statement defines the interface for all statement nodes. +type Statement interface { + // Some of these nodes implement the Statement interface. + Node + statementNode() +} + +// Expression defines the interface for all expression nodes. +type Expression interface { + // Some of these nodes implement the Expression interface. + Node + expressionNode() +} + +// ============================================================================= +// Implementation of Node +// ============================================================================= + +// Program is the root node of every AST. Every valid program is a series of +// statements. +type Program struct { + // A program consists of a slice of AST nodes that implement the Statement + // interface. + Statements []Statement +} + +// TokenLiteral prints the literal value of the token associated with this node. +func (p *Program) TokenLiteral() string { + if len(p.Statements) > 0 { + return p.Statements[0].TokenLiteral() + } else { + return "" + } +} + +// LetStatement the `let` statement represents the AST node that binds an +// expression to an identifier +type LetStatement struct { + Token token.Token // the token.LET token + // Name hold the identifier of the binding and Value for the expression + // that produces the value. + Name *Identifier + Value Expression +} + +func (ls *LetStatement) statementNode() {} + +// TokenLiteral prints the literal value of the token associated with this node. +func (ls *LetStatement) TokenLiteral() string { return ls.Token.Literal } + +// Identifier is a node that holds the literal value of an identifier +type Identifier struct { + Token token.Token // the token.IDENT token + Value string +} + +// To hold the identifier of the binding, the x in let x = 5; , we have the +// Identifier struct type, which implements the Expression interface. +func (i *Identifier) expressionNode() {} + +// TokenLiteral prints the literal value of the token associated with this node. +func (i *Identifier) TokenLiteral() string { return i.Token.Literal } diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..626c2d8 --- /dev/null +++ b/parser/parser.go @@ -0,0 +1,112 @@ +package parser + +// Package parser implements the parser that takes as input tokens from the +// lexer and produces as output an AST (Abstract Syntax Tree). + +import ( + "github.com/cedrickchee/hou/ast" + "github.com/cedrickchee/hou/lexer" + "github.com/cedrickchee/hou/token" +) + +// Parser implements the parser. +type Parser struct { + l *lexer.Lexer + + curToken token.Token + peekToken token.Token +} + +// New constructs a new Parser with a Lexer as input. +func New(l *lexer.Lexer) *Parser { + p := &Parser{l: l} + + // Read two tokens, so curToken and peekToken are both set. + p.nextToken() + p.nextToken() + + return p +} + +// Helper method that advances both curToken and peekToken. +func (p *Parser) nextToken() { + p.curToken = p.peekToken + p.peekToken = p.l.NextToken() +} + +// ParseProgram starts the parsing process and is the entry point for all other +// sub-parsers that are responsible for other nodes in the AST. +func (p *Parser) ParseProgram() *ast.Program { + // Construct the root node of the AST. + program := &ast.Program{} + program.Statements = []ast.Statement{} + + // Iterate over every token in the input until it encounters an token.EOF + // token. + for p.curToken.Type != token.EOF { + stmt := p.parseStatement() + if stmt != nil { + program.Statements = append(program.Statements, stmt) + } + p.nextToken() + } + return program +} + +// Parse a statement. +func (p *Parser) parseStatement() ast.Statement { + switch p.curToken.Type { + case token.LET: + return p.parseLetStatement() + default: + return nil + } +} + +func (p *Parser) parseLetStatement() *ast.LetStatement { + // Constructs an *ast.LetStatement node with the token it’s currently + // sitting on (a token.LET token). + stmt := &ast.LetStatement{Token: p.curToken} + + // Advances the tokens while making assertions about the next token. + if !p.expectPeek(token.IDENT) { + return nil + } + + // Use token.IDENT token to construct an *ast.Identifier node. + stmt.Name = &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal} + + // Expects an equal sign and jumps over the expression following the + // equal sign. + if !p.expectPeek(token.ASSIGN) { + return nil + } + + // TODO: We're skipping the expressions until we + // encounter a semicolon + for !p.curTokenIs(token.SEMICOLON) { + p.nextToken() + } + + return stmt +} + +// "assertion functions". +// Enforce the correctness of the order of tokens by checking the type of the +// next token. +func (p *Parser) expectPeek(t token.TokenType) bool { + if p.peekTokenIs(t) { + p.nextToken() + return true + } else { + return false + } +} + +func (p *Parser) peekTokenIs(t token.TokenType) bool { + return p.peekToken.Type == t +} + +func (p *Parser) curTokenIs(t token.TokenType) bool { + return p.curToken.Type == t +} diff --git a/parser/parser_test.go b/parser/parser_test.go new file mode 100644 index 0000000..941aac3 --- /dev/null +++ b/parser/parser_test.go @@ -0,0 +1,67 @@ +package parser + +import ( + "testing" + + "github.com/cedrickchee/hou/ast" + "github.com/cedrickchee/hou/lexer" +) + +func TestLetStatements(t *testing.T) { + input := ` +let x = 5; +let y = 10; +let foobar = 838383; +` + l := lexer.New(input) + p := New(l) + + program := p.ParseProgram() + if program == nil { + t.Fatalf("ParseProgram() returned nil") + } + if len(program.Statements) != 3 { + t.Fatalf("program.Statements does not contain 3 statements. got=%d", + len(program.Statements)) + } + + tests := []struct { + expectedIdentifier string + }{ + {"x"}, + {"y"}, + {"foobar"}, + } + + for i, tt := range tests { + stmt := program.Statements[i] + if !testLetStatement(t, stmt, tt.expectedIdentifier) { + return + } + } +} + +func testLetStatement(t *testing.T, s ast.Statement, name string) bool { + if s.TokenLiteral() != "let" { + t.Errorf("s.TokenLiteral not 'let'. got=%q", s.TokenLiteral()) + return false + } + + letStmt, ok := s.(*ast.LetStatement) + if !ok { + t.Errorf("s not *ast.LetStatement. got=%T", s) + return false + } + + if letStmt.Name.Value != name { + t.Errorf("letStmt.Name.Value not '%s'. got=%s", name, letStmt.Name.Value) + return false + } + + if letStmt.Name.TokenLiteral() != name { + t.Errorf("s.Name not '%s'. got=%s", name, letStmt.Name) + return false + } + + return true +}