Skip to content

Commit

Permalink
updates ANTLR Go lexer and parser
Browse files Browse the repository at this point in the history
  • Loading branch information
ArquintL committed Mar 9, 2023
1 parent 62d6349 commit 8039bcf
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 39 deletions.
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ lazy val gobra = (project in file("."))
libraryDependencies += "org.apache.commons" % "commons-lang3" % "3.9", // for SystemUtils
libraryDependencies += "org.apache.commons" % "commons-text" % "1.9", // for escaping strings in parser preprocessor
libraryDependencies += "commons-codec" % "commons-codec" % "1.15", // for obtaining the hex encoding of a string
libraryDependencies += "org.antlr" % "antlr4-runtime" % "4.9.2",
libraryDependencies += "org.antlr" % "antlr4-runtime" % "4.12.0",

scalacOptions ++= Seq(
"-encoding", "UTF-8", // Enforce UTF-8, instead of relying on properly set locales
Expand Down
24 changes: 20 additions & 4 deletions src/main/antlr4/GoLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@
* https://golang.org/ref/spec
*/

// Imported to Gobra from https://github.com/antlr/grammars-v4/blob/4c06ad8cc8130931c75ca0b17cbc1453f3830cd2/golang

// Imported to Gobra from https://github.com/antlr/grammars-v4/blob/fae6a8500e9c6a1ec895fca1495b0384b9144091/golang

lexer grammar GoLexer;

Expand Down Expand Up @@ -145,7 +144,7 @@ HEX_FLOAT_LIT : '0' [xX] HEX_MANTISSA HEX_EXPONENT
fragment HEX_MANTISSA : ('_'? HEX_DIGIT)+ ('.' ( '_'? HEX_DIGIT )*)?
| '.' HEX_DIGIT ('_'? HEX_DIGIT)*;

fragment HEX_EXPONENT : [pP] [+-] DECIMALS;
fragment HEX_EXPONENT : [pP] [+-]? DECIMALS;


IMAGINARY_LIT : (DECIMAL_LIT | BINARY_LIT | OCTAL_LIT | HEX_LIT | FLOAT_LIT) 'i' -> mode(NLSEMI);
Expand All @@ -172,42 +171,54 @@ BIG_U_VALUE: '\\' 'U' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGI

RAW_STRING_LIT : '`' ~'`'* '`' -> mode(NLSEMI);
INTERPRETED_STRING_LIT : '"' (~["\\] | ESCAPED_VALUE)* '"' -> mode(NLSEMI);
// Hidden tokens
WS : [ \t]+ -> channel(HIDDEN);
COMMENT : '/*' .*? '*/' -> channel(HIDDEN);
TERMINATOR : [\r\n]+ -> channel(HIDDEN);
LINE_COMMENT : '//' ~[\r\n]* -> channel(HIDDEN);

fragment UNICODE_VALUE: ~[\r\n'] | LITTLE_U_VALUE | BIG_U_VALUE | ESCAPED_VALUE;
// Fragments
fragment ESCAPED_VALUE
: '\\' ('u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
| 'U' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
| [abfnrtv\\'"]
| OCTAL_DIGIT OCTAL_DIGIT OCTAL_DIGIT
| 'x' HEX_DIGIT HEX_DIGIT)
;
fragment DECIMALS
: [0-9] ('_'? [0-9])*
;
fragment OCTAL_DIGIT
: [0-7]
;
fragment HEX_DIGIT
: [0-9a-fA-F]
;
fragment BIN_DIGIT
: [01]
;
fragment EXPONENT
: [eE] [+-]? DECIMALS
;
fragment LETTER
: UNICODE_LETTER
| '_'
;
fragment UNICODE_DIGIT
: [\p{Nd}]
/* [\u0030-\u0039]
| [\u0660-\u0669]
| [\u06F0-\u06F9]
Expand All @@ -229,6 +240,7 @@ fragment UNICODE_DIGIT
| [\u1810-\u1819]
| [\uFF10-\uFF19]*/
;
fragment UNICODE_LETTER
: [\p{L}]
/* [\u0041-\u005A]
Expand Down Expand Up @@ -494,7 +506,11 @@ fragment UNICODE_LETTER
| [\uFFDA-\uFFDC]
*/
;
mode NLSEMI;
// Treat whitespace as normal
WS_NLSEMI : [ \t]+ -> channel(HIDDEN);
// Ignore any comments that only span one line
Expand All @@ -504,4 +520,4 @@ LINE_COMMENT_NLSEMI : '//' ~[\r\n]* -> channel(HIDDEN);
//return to normal lexing
EOS: ([\r\n]+ | ';' | '/*' .*? '*/' | EOF) -> mode(DEFAULT_MODE);
// Did not find an EOS, so go back to normal lexing
OTHER: -> mode(DEFAULT_MODE), channel(HIDDEN);
OTHER: -> mode(DEFAULT_MODE), channel(HIDDEN);
62 changes: 28 additions & 34 deletions src/main/antlr4/GoParser.g4
Original file line number Diff line number Diff line change
@@ -1,38 +1,32 @@
/*
[The "BSD licence"]
Copyright (c) 2017 Sasa Coh, Michał Błotniak
Copyright (c) 2019 Ivan Kochurkin, kvanttt@gmail.com, Positive Technologies
Copyright (c) 2019 Dmitry Rassadin, flipparassa@gmail.com, Positive Technologies
Copyright (c) 2021 Martin Mirchev, mirchevmartin2203@gmail.com
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
[The "BSD licence"] Copyright (c) 2017 Sasa Coh, Michał Błotniak Copyright (c) 2019 Ivan Kochurkin,
kvanttt@gmail.com, Positive Technologies Copyright (c) 2019 Dmitry Rassadin,
flipparassa@gmail.com,Positive Technologies All rights reserved. Copyright (c) 2021 Martin Mirchev,
mirchevmartin2203@gmail.com
Redistribution and use in source and binary forms, with or without modification, are permitted
provided that the following conditions are met: 1. Redistributions of source code must retain the
above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in
binary form must reproduce the above copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided with the distribution. 3. The name
of the author may not be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/

/*
* A Go grammar for ANTLR 4 derived from the Go Language Specification https://golang.org/ref/spec
*/

// Imported to Gobra from https://github.com/antlr/grammars-v4/blob/4c06ad8cc8130931c75ca0b17cbc1453f3830cd2/golang
// Imported to Gobra from https://github.com/antlr/grammars-v4/tree/fae6a8500e9c6a1ec895fca1495b0384b9144091/golang

parser grammar GoParser;

Expand Down Expand Up @@ -87,7 +81,7 @@ varSpec:

block: L_CURLY statementList? R_CURLY;

statementList: (eos? statement eos)+;
statementList: ((SEMI? | EOS? | {this.closingBracket()}?) statement eos)+;

statement:
declaration
Expand Down Expand Up @@ -194,7 +188,7 @@ commCase: CASE (sendStmt | recvStmt) | DEFAULT;

recvStmt: (expressionList ASSIGN | identifierList DECLARE_ASSIGN)? recvExpr = expression;

forStmt: FOR (expression | forClause | rangeClause)? block;
forStmt: FOR (expression? | forClause | rangeClause?) block;

forClause:
initStmt = simpleStmt? eos expression? eos postStmt = simpleStmt?;
Expand Down Expand Up @@ -384,5 +378,5 @@ eos:
SEMI
| EOF
| EOS
| {closingBracket()}?
;
| {this.closingBracket()}?
;

0 comments on commit 8039bcf

Please sign in to comment.