4.4 Data Types (arrays)

* Support arrays in lexer * Parse array literals
cedrickchee · Apr 1, 2020 · 1459f03 · 1459f03
1 parent 02b8bcf
commit 1459f03
Show file tree

Hide file tree

Showing 6 changed files with 113 additions and 14 deletions.
diff --git a/ast/ast.go b/ast/ast.go
@@ -386,3 +386,30 @@ func (sl *StringLiteral) TokenLiteral() string { return sl.Token.Literal }
 
 // String returns a stringified version of the AST for debugging.
 func (sl *StringLiteral) String() string { return sl.Token.Literal }
+
+// ArrayLiteral represents the array literal and holds a list of expressions.
+type ArrayLiteral struct {
+	Token    token.Token // the '[' token
+	Elements []Expression
+}
+
+func (al *ArrayLiteral) expressionNode() {}
+
+// TokenLiteral prints the literal value of the token associated with this node.
+func (al *ArrayLiteral) TokenLiteral() string { return al.Token.Literal }
+
+// String returns a stringified version of the AST for debugging.
+func (al *ArrayLiteral) String() string {
+	var out bytes.Buffer
+
+	elements := []string{}
+	for _, el := range al.Elements {
+		elements = append(elements, el.String())
+	}
+
+	out.WriteString("[")
+	out.WriteString(strings.Join(elements, ", "))
+	out.WriteString("]")
+
+	return out.String()
+}
diff --git a/lexer/lexer.go b/lexer/lexer.go
@@ -76,6 +76,10 @@ func (l *Lexer) NextToken() token.Token {
 	case '"':
 		tok.Type = token.STRING
 		tok.Literal = l.readString()
+	case '[':
+		tok = newToken(token.LBRACKET, l.ch)
+	case ']':
+		tok = newToken(token.RBRACKET, l.ch)
 	case 0:
 		tok.Literal = ""
 		tok.Type = token.EOF

diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go
@@ -30,6 +30,7 @@ if (5 < 10) {
 10 != 9;
 "foobar"
 "foo bar"
+[1, 2];
 `
 
 	tests := []struct {
@@ -111,6 +112,12 @@ if (5 < 10) {
 		{token.SEMICOLON, ";"},
 		{token.STRING, "foobar"},
 		{token.STRING, "foo bar"},
+		{token.LBRACKET, "["},
+		{token.INT, "1"},
+		{token.COMMA, ","},
+		{token.INT, "2"},
+		{token.RBRACKET, "]"},
+		{token.SEMICOLON, ";"},
 		{token.EOF, ""},
 	}
 

diff --git a/parser/parser.go b/parser/parser.go
@@ -87,6 +87,7 @@ func New(l *lexer.Lexer) *Parser {
 	p.registerPrefix(token.LPAREN, p.parseGroupedExpression)
 	p.registerPrefix(token.IF, p.parseIfExpression)
 	p.registerPrefix(token.FUNCTION, p.parseFunctionLiteral)
+	p.registerPrefix(token.LBRACKET, p.parseArrayLiteral)
 
 	p.infixParseFns = make(map[token.TokenType]infixParseFn)
 	p.registerInfix(token.PLUS, p.parseInfixExpression)
@@ -459,38 +460,48 @@ func (p *Parser) parseFunctionParameters() []*ast.Identifier {
 
 func (p *Parser) parseCallExpression(function ast.Expression) ast.Expression {
 	exp := &ast.CallExpression{Token: p.curToken, Function: function}
-	exp.Arguments = p.parseCallArguments()
+	exp.Arguments = p.parseExpressionList(token.RPAREN)
 	return exp
 }
 
-// Parse the function's argument list.
-func (p *Parser) parseCallArguments() []ast.Expression {
+// Parse a list of comma separated expressions.
+func (p *Parser) parseExpressionList(end token.TokenType) []ast.Expression {
 	// This method looks strikingly similar to parseFunctionParameters, except
 	// that it's more generic and returns a slice of ast.Expression and not
 	// *ast.Identifier (because call expression AST structure is:
 	// <expression>(<comma separated expressions>))
+	// The end parameter tells the method which token signifies the end of the
+	// list.
 
-	args := []ast.Expression{}
+	list := []ast.Expression{}
 
-	if p.peekTokenIs(token.RPAREN) {
+	if p.peekTokenIs(end) {
 		p.nextToken()
-		return args
+		return list
 	}
 
 	p.nextToken()
-	args = append(args, p.parseExpression(LOWEST))
+	list = append(list, p.parseExpression(LOWEST))
 
 	for p.peekTokenIs(token.COMMA) {
 		p.nextToken()
 		p.nextToken()
-		args = append(args, p.parseExpression(LOWEST))
+		list = append(list, p.parseExpression(LOWEST))
 	}
 
-	if !p.expectPeek(token.RPAREN) {
+	if !p.expectPeek(end) {
 		return nil
 	}
 
-	return args
+	return list
+}
+
+func (p *Parser) parseArrayLiteral() ast.Expression {
+	array := &ast.ArrayLiteral{Token: p.curToken}
+
+	array.Elements = p.parseExpressionList(token.RBRACKET)
+
+	return array
 }
 
 // "assertion functions".

diff --git a/parser/parser_test.go b/parser/parser_test.go
@@ -742,6 +742,54 @@ func TestStringLiteralExpression(t *testing.T) {
 	}
 }
 
+func TestParsingArrayLiterals(t *testing.T) {
+	// Test makes sure that parsing array literals results in a
+	// *ast.ArrayLiteral being returned.
+
+	input := "[1, 2 * 2, 3 + 3]"
+
+	l := lexer.New(input)
+	p := New(l)
+	program := p.ParseProgram()
+	checkParserErrors(t, p)
+
+	stmt, ok := program.Statements[0].(*ast.ExpressionStatement)
+	array, ok := stmt.Expression.(*ast.ArrayLiteral)
+	if !ok {
+		t.Fatalf("exp not ast.ArrayLiteral. got=%T", stmt.Expression)
+	}
+
+	if len(array.Elements) != 3 {
+		t.Fatalf("len(array.Elements) not 3. got=%d", len(array.Elements))
+	}
+
+	testIntegerLiteral(t, array.Elements[0], 1)
+	testInfixExpression(t, array.Elements[1], 2, "*", 2)
+	testInfixExpression(t, array.Elements[2], 3, "+", 3)
+}
+
+func TestParsingEmptyArrayLiterals(t *testing.T) {
+	// Test for empty array literals to make sure that we don't run into nasty
+	// edge-cases.
+
+	input := "[]"
+
+	l := lexer.New(input)
+	p := New(l)
+	program := p.ParseProgram()
+	checkParserErrors(t, p)
+
+	stmt, ok := program.Statements[0].(*ast.ExpressionStatement)
+	array, ok := stmt.Expression.(*ast.ArrayLiteral)
+	if !ok {
+		t.Fatalf("exp not ast.ArrayLiteral. got=%T", stmt.Expression)
+	}
+
+	if len(array.Elements) != 0 {
+		t.Errorf("len(array.Elements) not 0. got=%d", len(array.Elements))
+	}
+}
+
 func testIntegerLiteral(t *testing.T, il ast.Expression, value int64) bool {
 	integ, ok := il.(*ast.IntegerLiteral)
 	if !ok {

diff --git a/token/token.go b/token/token.go
@@ -40,10 +40,12 @@ const (
 	COMMA     = "," // a comma
 	SEMICOLON = ";" // a semi-colon
 
-	LPAREN = "(" // a left paranthesis
-	RPAREN = ")" // a right parenthesis
-	LBRACE = "{" // a left brace
-	RBRACE = "}" // a right brace
+	LPAREN   = "(" // a left paranthesis
+	RPAREN   = ")" // a right parenthesis
+	LBRACE   = "{" // a left brace
+	RBRACE   = "}" // a right brace
+	LBRACKET = "[" // a left bracket
+	RBRACKET = "]" // a right bracket
 
 	//
 	// Keywords