Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: parsing let statements #18

Merged
merged 11 commits into from
Apr 7, 2024
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,6 @@ go.work

presentations/**/node_modules
presentations/**/dist

# IntelliJ Editor files
.idea/
104 changes: 104 additions & 0 deletions src/ast/ast.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
Package ast provides functionality to represent YARTBML Programs as an
Abstract Syntax Tree (Parse Tree).

Programs in YARTBML are a series of statements.

A fully valid program written in YARTBML is the following:

let x = 10;
let y = 15;

let add = fn(a, b) {
return a + b;
}

We can see three statements, three variable binding - let statements of the following form:

let <identifier> = <expression>;

A let statement consists of two changing parts: an identifer and and an expression.
In the example above, x and y and add are identifiers. 10, 15, and the function literal are expressions.

The difference between an expression and a statement is the following: Expressions produce values and statements don't.
A `return 5;` statement doesn't produce a value, but add(5, 5) does.

We will be using this AST (of statements and expressions) and apply Pratt Parsing in for our language.
*/
package ast

import (
"YARTBML/token"
)

// Nodes are going to contain our language's construct of
// "Expression(s)" or "Statement(s)". Each node will be used
// to build our AST (Abstract Syntax Tree) aka Parse Tree.
// Every node will have provide the literal value of the token
// it is associated with. The method itself will be used solely
// for debugging purposes.
type Node interface {
TokenLiteral() string
}

// Statement don't produce a value but represents an object identifier that
// doesn't return a value explicitly.
type Statement interface {
Node
statementNode()
}

// Expressions produce values that should be handled.
type Expression interface {
Node
expressionNode()
}

// Our programs are a series of statements.
// This is our root node for our ast.
type Program struct {
Statements []Statement
}

func (p *Program) TokenLiteral() string {
if len(p.Statements) > 0 {
return p.Statements[0].TokenLiteral()
} else {
return ""
}
}

// Represents a Let "Statement" within our AST to indicate an identifier
// that holds a value. A Let Statement has `Name` to hold the identifier
// of the binding and `Value` for the expression that produces the value.
type LetStatement struct {
Token token.Token // token.LET token
Name *Identifier
Value Expression
}

// Implementing the Statement interface on LetStatement
func (ls *LetStatement) statementNode() {}

// Implementing the Node interface on LetStatement
func (ls *LetStatement) TokenLiteral() string {
return ls.Token.Literal
}

// Holds identifier of the binding in the [LetStatement]
// the x in `let x = 5;`. The value would be the name of the
// identifier in the [LetStatement].
type Identifier struct {
Token token.Token // token.IDENT token
Value string
}

// Implementing the Expression on an Identifer, as when the
// identifier is referenced in other parts of a program, it
// will produce a value.
func (i *Identifier) expressionNode() {}

// Implementing the Node interface on the IdentiferExpression
func (i *Identifier) TokenLiteral() string {
return i.Token.Literal
}
21 changes: 20 additions & 1 deletion src/lexer/lexer.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
// Package lexer provides functionality to tokenize input strings into tokens in the YARTBML Programming Language.
// The lexer (lexical analyzer) reads the input string character by character, identifying tokens such as identifiers,
// keywords, operators, and literals, and creating corresponding tokens.
// Each token has a type and a literal value associated with it.
package lexer

import "YARTBML/token"
Expand All @@ -9,12 +13,15 @@ type Lexer struct {
ch byte // current char under examination
}

// Initialize a new Lexer with the given program contents as a string input.
func New(input string) *Lexer {
l := &Lexer{input: input}
l.readChar()
return l
}

// Reads the next character from the input string
// and advances the lexer's position.
func (l *Lexer) readChar() {
if l.readPosition >= len(l.input) {
l.ch = 0
Expand All @@ -25,10 +32,12 @@ func (l *Lexer) readChar() {
l.readPosition += 1
}

// Create a new token with the given `TokenType` and character.
func newToken(tokenType token.TokenType, ch byte) token.Token {
return token.Token{Type: tokenType, Literal: string(ch)}
}

// Returns the NextToken from the input string (program contents).
func (l *Lexer) NextToken() token.Token {
var tok token.Token
l.skipWhitespace()
Expand Down Expand Up @@ -95,6 +104,9 @@ func (l *Lexer) NextToken() token.Token {
return tok
}

// When a series of letters are encountered, the assumption
// is that unless if it is a keyword, then it reads it as an
// identifier token.
func (l *Lexer) readIdentifier() string {
position := l.position
for isLetter(l.ch) {
Expand All @@ -103,6 +115,9 @@ func (l *Lexer) readIdentifier() string {
return l.input[position:l.position]
}

// When a series of numbers are encountered, the assumption
// is that a number literal has been encountered and returns
// a number literal.
func (l *Lexer) readNumber() string {
position := l.position
for isDigit(l.ch) {
Expand All @@ -111,25 +126,29 @@ func (l *Lexer) readNumber() string {
return l.input[position:l.position]
}

// Verifies if a given character is within this regex: [a-zA-Z_]
func isLetter(ch byte) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
}

// Verifies if a given cahracter is within this regex: [0-9]
func isDigit(ch byte) bool {
return '0' <= ch && ch <= '9'
}

// Consume Whitespace equivalent method (including carriage return and linefeed)
// as our language isn't whitespace sensitive.
func (l *Lexer) skipWhitespace() {
for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
l.readChar()
}
}

// Returns the next character in the input string without advancing the lexer.
func (l *Lexer) peekChar() byte {
if l.readPosition >= len(l.input) {
return 0
} else {
return l.input[l.readPosition]
}
}

136 changes: 136 additions & 0 deletions src/parser/parser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// Package parser provides functionality to parse tokens into an abstract syntax tree (AST) in the YARTBML Programming Language.
// The parser analyzes tokens generated by the lexer and constructs an AST representing the program's structure.
// It defines grammar rules and recursively traverses the token stream to build the AST nodes.
// The implementation is a Top-Down Operator Precedence Parser (Pratt Parser).
package parser

import (
"YARTBML/ast"
"YARTBML/lexer"
"YARTBML/token"
"fmt"
)

// Parses each token received from the lexer and
// stores errors into a string array as they are spotted
// in the provided YARTBML program (UTF-8 string).
type Parser struct {
l *lexer.Lexer // Lexer instance for tokenization
errors []string // Parsing errors encountered

curToken token.Token // Current token being parsed
peekToken token.Token // Next token to be parsed
}

// Creates a new instance of the Parser with a given Lexer.
func New(l *lexer.Lexer) *Parser {
p := &Parser{
l: l,
errors: []string{},
}

// read two tokens, so curToken and peekToken are both set
// acts exactly like lexer's position and readPosition (for lookaheads)
p.nextToken()
p.nextToken()

return p
}

// Advances the parser to the next token.
func (p *Parser) nextToken() {
p.curToken = p.peekToken
p.peekToken = p.l.NextToken()
}

// Parses the entire program and constructs the ast.
// Iterates over every token in the input until EOF token is encountered.
// Since our programs are a series of statements, it attempts to parse every statement in a sequence.
func (p *Parser) ParseProgram() *ast.Program {
program := &ast.Program{}
program.Statements = []ast.Statement{}

for p.curToken.Type != token.EOF {
stmt := p.parseStatement()
if stmt != nil {
program.Statements = append(program.Statements, stmt)
}
p.nextToken()
}

return program
}

// Parses each statement and create a statement node and
// child Expression nodes based on the type of statement node
// encountered.
func (p *Parser) parseStatement() ast.Statement {
switch p.curToken.Type {
case token.LET:
return p.parseLetStatement()
default:
return nil
}
}

// Parse Let Statements down to Name-Identifier Node and Value-Expression Node
func (p *Parser) parseLetStatement() *ast.LetStatement {
// Construct LetStatement Node
stmt := &ast.LetStatement{Token: p.curToken}

if !p.expectPeek(token.IDENT) {
return nil
}

// Construct Identifier Node: IDENT token & Name of Identifier as Value
stmt.Name = &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal}

if !p.expectPeek(token.ASSIGN) {
return nil
}

// TODO: Skipping expressions until we encounter a semicolon
// TODO: Construct Expression
for !p.curTokenIs(token.SEMICOLON) {
p.nextToken()
}

return stmt
}

// Check if currentToken's TokenType matches given TokenType
func (p *Parser) curTokenIs(t token.TokenType) bool {
return p.curToken.Type == t
}

// Checks if the peekToken's (nextToken) TokenType matches given TokenType
func (p *Parser) peekTokenIs(t token.TokenType) bool {
return p.peekToken.Type == t
}

// Checks if the nextToken is the given TokenType. Essentially, a lookahead by one
// in order to confirm the next token. If the given token is not expected, then
// we generate an error to append into the errors array that is part of the
// current [Parser] instance.
func (p *Parser) expectPeek(t token.TokenType) bool {
if p.peekTokenIs(t) {
p.nextToken()
return true
} else {
p.peekError(t)
return false
}
}

// Returns all parsing errors encountered
func (p *Parser) Errors() []string {
return p.errors
}

// Appends to errors property of the Parser Instance when the nextToken
// is not what is expected.
func (p *Parser) peekError(t token.TokenType) {
msg := fmt.Sprintf("expected next token to be %s, got %s instead",
t, p.peekToken.Type)
p.errors = append(p.errors, msg)
}
Loading