dineshUmasankar · dineshUmasankar · Apr 7, 2024 · Mar 20, 2024 · Mar 22, 2024 · Mar 23, 2024
diff --git a/.gitignore b/.gitignore
@@ -22,3 +22,6 @@ go.work
 
 presentations/**/node_modules
 presentations/**/dist
+
+# IntelliJ Editor files
+.idea/
diff --git a/src/ast/ast.go b/src/ast/ast.go
@@ -0,0 +1,104 @@
+/*
+Package ast provides functionality to represent YARTBML Programs as an
+Abstract Syntax Tree (Parse Tree).
+
+Programs in YARTBML are a series of statements.
+
+A fully valid program written in YARTBML is the following:
+
+	let x = 10;
+	let y = 15;
+
+	let add = fn(a, b) {
+		return a + b;
+	}
+
+We can see three statements, three variable binding - let statements of the following form:
+
+	let <identifier> = <expression>;
+
+A let statement consists of two changing parts: an identifer and and an expression.
+In the example above, x and y and add are identifiers. 10, 15, and the function literal are expressions.
+
+The difference between an expression and a statement is the following: Expressions produce values and statements don't.
+A `return 5;` statement doesn't produce a value, but add(5, 5) does.
+
+We will be using this AST (of statements and expressions) and apply Pratt Parsing in for our language.
+*/
+package ast
+
+import (
+	"YARTBML/token"
+)
+
+// Nodes are going to contain our language's construct of
+// "Expression(s)" or "Statement(s)". Each node will be used
+// to build our AST (Abstract Syntax Tree) aka Parse Tree.
+// Every node will have provide the literal value of the token
+// it is associated with. The method itself will be used solely
+// for debugging purposes.
+type Node interface {
+	TokenLiteral() string
+}
+
+// Statement don't produce a value but represents an object identifier that
+// doesn't return a value explicitly.
+type Statement interface {
+	Node
+	statementNode()
+}
+
+// Expressions produce values that should be handled.
+type Expression interface {
+	Node
+	expressionNode()
+}
+
+// Our programs are a series of statements.
+// This is our root node for our ast.
+type Program struct {
+	Statements []Statement
+}
+
+func (p *Program) TokenLiteral() string {
+	if len(p.Statements) > 0 {
+		return p.Statements[0].TokenLiteral()
+	} else {
+		return ""
+	}
+}
+
+// Represents a Let "Statement" within our AST to indicate an identifier
+// that holds a value. A Let Statement has `Name` to hold the identifier
+// of the binding and `Value` for the expression that produces the value.
+type LetStatement struct {
+	Token token.Token // token.LET token
+	Name  *Identifier
+	Value Expression
+}
+
+// Implementing the Statement interface on LetStatement
+func (ls *LetStatement) statementNode() {}
+
+// Implementing the Node interface on LetStatement
+func (ls *LetStatement) TokenLiteral() string {
+	return ls.Token.Literal
+}
+
+// Holds identifier of the binding in the [LetStatement]
+// the x in `let x = 5;`. The value would be the name of the
+// identifier in the [LetStatement].
+type Identifier struct {
+	Token token.Token // token.IDENT token
+	Value string
+}
+
+// Implementing the Expression on an Identifer, as when the
+// identifier is referenced in other parts of a program, it
+// will produce a value.
+func (i *Identifier) expressionNode() {}
+
+// Implementing the Node interface on the IdentiferExpression
+func (i *Identifier) TokenLiteral() string {
+	return i.Token.Literal
+}
diff --git a/src/lexer/lexer.go b/src/lexer/lexer.go
@@ -1,3 +1,7 @@
+// Package lexer provides functionality to tokenize input strings into tokens in the YARTBML Programming Language.
+// The lexer (lexical analyzer) reads the input string character by character, identifying tokens such as identifiers,
+// keywords, operators, and literals, and creating corresponding tokens.
+// Each token has a type and a literal value associated with it.
 package lexer
 
 import "YARTBML/token"
@@ -9,12 +13,15 @@ type Lexer struct {
 	ch           byte // current char under examination
 }
 
+// Initialize a new Lexer with the given program contents as a string input.
 func New(input string) *Lexer {
 	l := &Lexer{input: input}
 	l.readChar()
 	return l
 }
 
+// Reads the next character from the input string
+// and advances the lexer's position.
 func (l *Lexer) readChar() {
 	if l.readPosition >= len(l.input) {
 		l.ch = 0
@@ -25,10 +32,12 @@ func (l *Lexer) readChar() {
 	l.readPosition += 1
 }
 
+// Create a new token with the given `TokenType` and character.
 func newToken(tokenType token.TokenType, ch byte) token.Token {
 	return token.Token{Type: tokenType, Literal: string(ch)}
 }
 
+// Returns the NextToken from the input string (program contents).
 func (l *Lexer) NextToken() token.Token {
 	var tok token.Token
 	l.skipWhitespace()
@@ -95,6 +104,9 @@ func (l *Lexer) NextToken() token.Token {
 	return tok
 }
 
+// When a series of letters are encountered, the assumption
+// is that unless if it is a keyword, then it reads it as an
+// identifier token.
 func (l *Lexer) readIdentifier() string {
 	position := l.position
 	for isLetter(l.ch) {
@@ -103,6 +115,9 @@ func (l *Lexer) readIdentifier() string {
 	return l.input[position:l.position]
 }
 
+// When a series of numbers are encountered, the assumption
+// is that a number literal has been encountered and returns
+// a number literal.
 func (l *Lexer) readNumber() string {
 	position := l.position
 	for isDigit(l.ch) {
@@ -111,25 +126,29 @@ func (l *Lexer) readNumber() string {
 	return l.input[position:l.position]
 }
 
+// Verifies if a given character is within this regex: [a-zA-Z_]
 func isLetter(ch byte) bool {
 	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
 }
 
+// Verifies if a given cahracter is within this regex: [0-9]
 func isDigit(ch byte) bool {
 	return '0' <= ch && ch <= '9'
 }
 
+// Consume Whitespace equivalent method (including carriage return and linefeed)
+// as our language isn't whitespace sensitive.
 func (l *Lexer) skipWhitespace() {
 	for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
 		l.readChar()
 	}
 }
 
+// Returns the next character in the input string without advancing the lexer.
 func (l *Lexer) peekChar() byte {
 	if l.readPosition >= len(l.input) {
 		return 0
 	} else {
 		return l.input[l.readPosition]
 	}
 }
-
diff --git a/src/parser/parser.go b/src/parser/parser.go
@@ -0,0 +1,136 @@
+// Package parser provides functionality to parse tokens into an abstract syntax tree (AST) in the YARTBML Programming Language.
+// The parser analyzes tokens generated by the lexer and constructs an AST representing the program's structure.
+// It defines grammar rules and recursively traverses the token stream to build the AST nodes.
+// The implementation is a Top-Down Operator Precedence Parser (Pratt Parser).
+package parser
+
+import (
+	"YARTBML/ast"
+	"YARTBML/lexer"
+	"YARTBML/token"
+	"fmt"
+)
+
+// Parses each token received from the lexer and
+// stores errors into a string array as they are spotted
+// in the provided YARTBML program (UTF-8 string).
+type Parser struct {
+	l      *lexer.Lexer // Lexer instance for tokenization
+	errors []string     // Parsing errors encountered
+
+	curToken  token.Token // Current token being parsed
+	peekToken token.Token // Next token to be parsed
+}
+
+// Creates a new instance of the Parser with a given Lexer.
+func New(l *lexer.Lexer) *Parser {
+	p := &Parser{
+		l:      l,
+		errors: []string{},
+	}
+
+	// read two tokens, so curToken and peekToken are both set
+	// acts exactly like lexer's position and readPosition (for lookaheads)
+	p.nextToken()
+	p.nextToken()
+
+	return p
+}
+
+// Advances the parser to the next token.
+func (p *Parser) nextToken() {
+	p.curToken = p.peekToken
+	p.peekToken = p.l.NextToken()
+}
+
+// Parses the entire program and constructs the ast.
+// Iterates over every token in the input until EOF token is encountered.
+// Since our programs are a series of statements, it attempts to parse every statement in a sequence.
+func (p *Parser) ParseProgram() *ast.Program {
+	program := &ast.Program{}
+	program.Statements = []ast.Statement{}
+
+	for p.curToken.Type != token.EOF {
+		stmt := p.parseStatement()
+		if stmt != nil {
+			program.Statements = append(program.Statements, stmt)
+		}
+		p.nextToken()
+	}
+
+	return program
+}
+
+// Parses each statement and create a statement node and
+// child Expression nodes based on the type of statement node
+// encountered.
+func (p *Parser) parseStatement() ast.Statement {
+	switch p.curToken.Type {
+	case token.LET:
+		return p.parseLetStatement()
+	default:
+		return nil
+	}
+}
+
+// Parse Let Statements down to Name-Identifier Node and Value-Expression Node
+func (p *Parser) parseLetStatement() *ast.LetStatement {
+	// Construct LetStatement Node
+	stmt := &ast.LetStatement{Token: p.curToken}
+
+	if !p.expectPeek(token.IDENT) {
+		return nil
+	}
+
+	// Construct Identifier Node: IDENT token & Name of Identifier as Value
+	stmt.Name = &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal}
+
+	if !p.expectPeek(token.ASSIGN) {
+		return nil
+	}
+
+	// TODO: Skipping expressions until we encounter a semicolon
+	// TODO: Construct Expression
+	for !p.curTokenIs(token.SEMICOLON) {
+		p.nextToken()
+	}
+
+	return stmt
+}
+
+// Check if currentToken's TokenType matches given TokenType
+func (p *Parser) curTokenIs(t token.TokenType) bool {
+	return p.curToken.Type == t
+}
+
+// Checks if the peekToken's (nextToken) TokenType matches given TokenType
+func (p *Parser) peekTokenIs(t token.TokenType) bool {
+	return p.peekToken.Type == t
+}
+
+// Checks if the nextToken is the given TokenType. Essentially, a lookahead by one
+// in order to confirm the next token. If the given token is not expected, then
+// we generate an error to append into the errors array that is part of the
+// current [Parser] instance.
+func (p *Parser) expectPeek(t token.TokenType) bool {
+	if p.peekTokenIs(t) {
+		p.nextToken()
+		return true
+	} else {
+		p.peekError(t)
+		return false
+	}
+}
+
+// Returns all parsing errors encountered
+func (p *Parser) Errors() []string {
+	return p.errors
+}
+
+// Appends to errors property of the Parser Instance when the nextToken
+// is not what is expected.
+func (p *Parser) peekError(t token.TokenType) {
+	msg := fmt.Sprintf("expected next token to be %s, got %s instead",
+		t, p.peekToken.Type)
+	p.errors = append(p.errors, msg)
+}