-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Lexer tests and token struct * fix: module naming and import resolution and start of lexer Because we don't have our package published / setup to publish properly, we can't pull a package from the github domain. * Functions for lexer to read chars in strings. Added tests. * Support for parsing numbers and eating white space * Added more tokens to lexer. Created lookahead functionality. * Implementation of REPL. Can run from command line. * ast and parser packages Created ast package which contains base, statement, expression node interfaces. Also created parser package which imports the ast, lexer, and token packages, contains token functions, statement functions, peek functions, has an error function, and began the ParseProgram() function. Created parser_test.go package as well. * fix: Remove IntelliJ Editor Project config files * style: apply go fmt * feat(docs): inline docs to all existing packages --------- Co-authored-by: Joseph Porrino <joeyporrino1998@gmail.com> Co-authored-by: Dinesh Umasankar <dinesh71uma@gmail.com>
- Loading branch information
1 parent
c6580c4
commit 06ee0fa
Showing
7 changed files
with
360 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,3 +22,6 @@ go.work | |
|
||
presentations/**/node_modules | ||
presentations/**/dist | ||
|
||
# IntelliJ Editor files | ||
.idea/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
/* | ||
Package ast provides functionality to represent YARTBML Programs as an | ||
Abstract Syntax Tree (Parse Tree). | ||
Programs in YARTBML are a series of statements. | ||
A fully valid program written in YARTBML is the following: | ||
let x = 10; | ||
let y = 15; | ||
let add = fn(a, b) { | ||
return a + b; | ||
} | ||
We can see three statements, three variable binding - let statements of the following form: | ||
let <identifier> = <expression>; | ||
A let statement consists of two changing parts: an identifer and and an expression. | ||
In the example above, x and y and add are identifiers. 10, 15, and the function literal are expressions. | ||
The difference between an expression and a statement is the following: Expressions produce values and statements don't. | ||
A `return 5;` statement doesn't produce a value, but add(5, 5) does. | ||
We will be using this AST (of statements and expressions) and apply Pratt Parsing in for our language. | ||
*/ | ||
package ast | ||
|
||
import ( | ||
"YARTBML/token" | ||
) | ||
|
||
// Nodes are going to contain our language's construct of | ||
// "Expression(s)" or "Statement(s)". Each node will be used | ||
// to build our AST (Abstract Syntax Tree) aka Parse Tree. | ||
// Every node will have provide the literal value of the token | ||
// it is associated with. The method itself will be used solely | ||
// for debugging purposes. | ||
type Node interface { | ||
TokenLiteral() string | ||
} | ||
|
||
// Statement don't produce a value but represents an object identifier that | ||
// doesn't return a value explicitly. | ||
type Statement interface { | ||
Node | ||
statementNode() | ||
} | ||
|
||
// Expressions produce values that should be handled. | ||
type Expression interface { | ||
Node | ||
expressionNode() | ||
} | ||
|
||
// Our programs are a series of statements. | ||
// This is our root node for our ast. | ||
type Program struct { | ||
Statements []Statement | ||
} | ||
|
||
func (p *Program) TokenLiteral() string { | ||
if len(p.Statements) > 0 { | ||
return p.Statements[0].TokenLiteral() | ||
} else { | ||
return "" | ||
} | ||
} | ||
|
||
// Represents a Let "Statement" within our AST to indicate an identifier | ||
// that holds a value. A Let Statement has `Name` to hold the identifier | ||
// of the binding and `Value` for the expression that produces the value. | ||
type LetStatement struct { | ||
Token token.Token // token.LET token | ||
Name *Identifier | ||
Value Expression | ||
} | ||
|
||
// Implementing the Statement interface on LetStatement | ||
func (ls *LetStatement) statementNode() {} | ||
|
||
// Implementing the Node interface on LetStatement | ||
func (ls *LetStatement) TokenLiteral() string { | ||
return ls.Token.Literal | ||
} | ||
|
||
// Holds identifier of the binding in the [LetStatement] | ||
// the x in `let x = 5;`. The value would be the name of the | ||
// identifier in the [LetStatement]. | ||
type Identifier struct { | ||
Token token.Token // token.IDENT token | ||
Value string | ||
} | ||
|
||
// Implementing the Expression on an Identifer, as when the | ||
// identifier is referenced in other parts of a program, it | ||
// will produce a value. | ||
func (i *Identifier) expressionNode() {} | ||
|
||
// Implementing the Node interface on the IdentiferExpression | ||
func (i *Identifier) TokenLiteral() string { | ||
return i.Token.Literal | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
// Package parser provides functionality to parse tokens into an abstract syntax tree (AST) in the YARTBML Programming Language. | ||
// The parser analyzes tokens generated by the lexer and constructs an AST representing the program's structure. | ||
// It defines grammar rules and recursively traverses the token stream to build the AST nodes. | ||
// The implementation is a Top-Down Operator Precedence Parser (Pratt Parser). | ||
package parser | ||
|
||
import ( | ||
"YARTBML/ast" | ||
"YARTBML/lexer" | ||
"YARTBML/token" | ||
"fmt" | ||
) | ||
|
||
// Parses each token received from the lexer and | ||
// stores errors into a string array as they are spotted | ||
// in the provided YARTBML program (UTF-8 string). | ||
type Parser struct { | ||
l *lexer.Lexer // Lexer instance for tokenization | ||
errors []string // Parsing errors encountered | ||
|
||
curToken token.Token // Current token being parsed | ||
peekToken token.Token // Next token to be parsed | ||
} | ||
|
||
// Creates a new instance of the Parser with a given Lexer. | ||
func New(l *lexer.Lexer) *Parser { | ||
p := &Parser{ | ||
l: l, | ||
errors: []string{}, | ||
} | ||
|
||
// read two tokens, so curToken and peekToken are both set | ||
// acts exactly like lexer's position and readPosition (for lookaheads) | ||
p.nextToken() | ||
p.nextToken() | ||
|
||
return p | ||
} | ||
|
||
// Advances the parser to the next token. | ||
func (p *Parser) nextToken() { | ||
p.curToken = p.peekToken | ||
p.peekToken = p.l.NextToken() | ||
} | ||
|
||
// Parses the entire program and constructs the ast. | ||
// Iterates over every token in the input until EOF token is encountered. | ||
// Since our programs are a series of statements, it attempts to parse every statement in a sequence. | ||
func (p *Parser) ParseProgram() *ast.Program { | ||
program := &ast.Program{} | ||
program.Statements = []ast.Statement{} | ||
|
||
for p.curToken.Type != token.EOF { | ||
stmt := p.parseStatement() | ||
if stmt != nil { | ||
program.Statements = append(program.Statements, stmt) | ||
} | ||
p.nextToken() | ||
} | ||
|
||
return program | ||
} | ||
|
||
// Parses each statement and create a statement node and | ||
// child Expression nodes based on the type of statement node | ||
// encountered. | ||
func (p *Parser) parseStatement() ast.Statement { | ||
switch p.curToken.Type { | ||
case token.LET: | ||
return p.parseLetStatement() | ||
default: | ||
return nil | ||
} | ||
} | ||
|
||
// Parse Let Statements down to Name-Identifier Node and Value-Expression Node | ||
func (p *Parser) parseLetStatement() *ast.LetStatement { | ||
// Construct LetStatement Node | ||
stmt := &ast.LetStatement{Token: p.curToken} | ||
|
||
if !p.expectPeek(token.IDENT) { | ||
return nil | ||
} | ||
|
||
// Construct Identifier Node: IDENT token & Name of Identifier as Value | ||
stmt.Name = &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal} | ||
|
||
if !p.expectPeek(token.ASSIGN) { | ||
return nil | ||
} | ||
|
||
// TODO: Skipping expressions until we encounter a semicolon | ||
// TODO: Construct Expression | ||
for !p.curTokenIs(token.SEMICOLON) { | ||
p.nextToken() | ||
} | ||
|
||
return stmt | ||
} | ||
|
||
// Check if currentToken's TokenType matches given TokenType | ||
func (p *Parser) curTokenIs(t token.TokenType) bool { | ||
return p.curToken.Type == t | ||
} | ||
|
||
// Checks if the peekToken's (nextToken) TokenType matches given TokenType | ||
func (p *Parser) peekTokenIs(t token.TokenType) bool { | ||
return p.peekToken.Type == t | ||
} | ||
|
||
// Checks if the nextToken is the given TokenType. Essentially, a lookahead by one | ||
// in order to confirm the next token. If the given token is not expected, then | ||
// we generate an error to append into the errors array that is part of the | ||
// current [Parser] instance. | ||
func (p *Parser) expectPeek(t token.TokenType) bool { | ||
if p.peekTokenIs(t) { | ||
p.nextToken() | ||
return true | ||
} else { | ||
p.peekError(t) | ||
return false | ||
} | ||
} | ||
|
||
// Returns all parsing errors encountered | ||
func (p *Parser) Errors() []string { | ||
return p.errors | ||
} | ||
|
||
// Appends to errors property of the Parser Instance when the nextToken | ||
// is not what is expected. | ||
func (p *Parser) peekError(t token.TokenType) { | ||
msg := fmt.Sprintf("expected next token to be %s, got %s instead", | ||
t, p.peekToken.Type) | ||
p.errors = append(p.errors, msg) | ||
} |
Oops, something went wrong.