diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d92aff58dc..e284631a5c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -127,8 +127,8 @@ jobs: - name: Install trgen shell: pwsh run: | - dotnet tool install -g trgen --version 0.16.1 - dotnet tool install -g trwdog --version 0.16.1 + dotnet tool install -g trgen --version 0.16.3 + dotnet tool install -g trwdog --version 0.16.3 if ("${{ matrix.os }}" -eq "ubuntu-latest") { echo "$HOME/.dotnet/tools" >> $env:GITHUB_PATH } diff --git a/_scripts/really-run.ps1 b/_scripts/really-run.ps1 index 6d43e895c6..431643198d 100644 --- a/_scripts/really-run.ps1 +++ b/_scripts/really-run.ps1 @@ -14,8 +14,8 @@ $antlrPath = _scripts/get-antlr.ps1 "4.10" # Set up env as it is used in test script. echo "antlr_path=$antlrPath" >> $env:GITHUB_ENV -dotnet tool install -g trgen --version 0.16.1 -dotnet tool install -g trwdog --version 0.16.1 +dotnet tool install -g trgen --version 0.16.3 +dotnet tool install -g trwdog --version 0.16.3 # Call test script. $env:ANTLR_JAR_PATH="$antlrPath" diff --git a/_scripts/regtest.sh b/_scripts/regtest.sh index 51773cfe3e..fb7d267e50 100644 --- a/_scripts/regtest.sh +++ b/_scripts/regtest.sh @@ -164,11 +164,11 @@ setupdeps() date echo "Setting up trgen and antlr jar." dotnet tool uninstall -g trgen - dotnet tool install -g trgen --version 0.16.1 + dotnet tool install -g trgen --version 0.16.3 dotnet tool uninstall -g trxml2 - dotnet tool install -g trxml2 --version 0.16.1 + dotnet tool install -g trxml2 --version 0.16.3 dotnet tool uninstall -g trwdog - dotnet tool install -g trwdog --version 0.16.1 + dotnet tool install -g trwdog --version 0.16.3 case "${unameOut}" in Linux*) curl 'https://repo1.maven.org/maven2/org/antlr/antlr4/4.10/antlr4-4.10-complete.jar' -o /tmp/antlr4-4.10-complete.jar;; Darwin*) curl 'https://repo1.maven.org/maven2/org/antlr/antlr4/4.10/antlr4-4.10-complete.jar' -o /tmp/antlr4-4.10-complete.jar;; diff --git a/_scripts/skip-cpp.txt b/_scripts/skip-cpp.txt index d09a1c8a6c..ba43abd406 100644 --- a/_scripts/skip-cpp.txt +++ b/_scripts/skip-cpp.txt @@ -6,7 +6,6 @@ alpaca angelscript antlr/antlr2 antlr/antlr3 -antlr/antlr4 apex apt arithmetic diff --git a/_scripts/skip-python3.txt b/_scripts/skip-python3.txt index 0a7760e2cf..88f07173a9 100644 --- a/_scripts/skip-python3.txt +++ b/_scripts/skip-python3.txt @@ -1,7 +1,6 @@ _grammar-test antlr/antlr2 antlr/antlr3 -antlr/antlr4 apex asm/asmMASM asm/masm diff --git a/antlr/antlr4/ANTLRv4Lexer.g4 b/antlr/antlr4/ANTLRv4Lexer.g4 index 2625b194f4..d72e8f7b1f 100644 --- a/antlr/antlr4/ANTLRv4Lexer.g4 +++ b/antlr/antlr4/ANTLRv4Lexer.g4 @@ -328,6 +328,11 @@ ARGUMENT_CONTENT : . ; +// TODO: This grammar and the one used in the Intellij Antlr4 plugin differ +// for "actions". This needs to be resolved at some point. +// The Intellij Antlr4 grammar is here: +// https://github.com/antlr/intellij-plugin-v4/blob/1f36fde17f7fa63cb18d7eeb9cb213815ac658fb/src/main/antlr/org/antlr/intellij/plugin/parser/ANTLRv4Lexer.g4#L587 + // ------------------------- // Target Language Actions // diff --git a/antlr/antlr4/ANTLRv4Parser.g4 b/antlr/antlr4/ANTLRv4Parser.g4 index 3ca05954b2..2f7ad1ffc3 100644 --- a/antlr/antlr4/ANTLRv4Parser.g4 +++ b/antlr/antlr4/ANTLRv4Parser.g4 @@ -212,7 +212,7 @@ labeledAlt // Lexer rules lexerRuleSpec - : FRAGMENT? TOKEN_REF COLON lexerRuleBlock SEMI + : FRAGMENT? TOKEN_REF optionsSpec? COLON lexerRuleBlock SEMI ; lexerRuleBlock diff --git a/antlr/antlr4/CSharp/LexerAdaptor.cs b/antlr/antlr4/CSharp/LexerAdaptor.cs index d2ad701792..f7e80fe87d 100644 --- a/antlr/antlr4/CSharp/LexerAdaptor.cs +++ b/antlr/antlr4/CSharp/LexerAdaptor.cs @@ -35,6 +35,7 @@ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. public abstract class LexerAdaptor : Lexer { private static readonly int PREQUEL_CONSTRUCT = -10; + private static readonly int OPTIONS_CONSTRUCT = -11; // I copy a reference to the stream, so It can be used as a Char Stream, not as a IISStream readonly ICharStream stream; @@ -45,16 +46,16 @@ public abstract class LexerAdaptor : Lexer protected LexerAdaptor(ICharStream input) : base(input, Console.Out, Console.Error) { - CurrentRuleType = TokenConstants.InvalidType; - _insideOptionsBlock = false; + CurrentRuleType = TokenConstants.InvalidType; + _insideOptionsBlock = false; stream = input; } protected LexerAdaptor(ICharStream input, TextWriter output, TextWriter errorOutput) : base(input, output, errorOutput) { - CurrentRuleType = TokenConstants.InvalidType; - _insideOptionsBlock = false; + CurrentRuleType = TokenConstants.InvalidType; + _insideOptionsBlock = false; stream = input; } @@ -107,20 +108,6 @@ protected void handleEndAction() } } - protected void handleOptionsLBrace() - { - if (_insideOptionsBlock) - { - Type = ANTLRv4Lexer.BEGIN_ACTION; - PushMode(ANTLRv4Lexer.TargetLanguageAction); - } - else - { - Type = ANTLRv4Lexer.LBRACE; - _insideOptionsBlock = true; - } - } - private bool InLexerRule { get { return CurrentRuleType == ANTLRv4Lexer.TOKEN_REF; } @@ -133,16 +120,27 @@ public override IToken Emit() // enter prequel construct ending with an RBRACE CurrentRuleType = PREQUEL_CONSTRUCT; } + else if (Type == ANTLRv4Lexer.OPTIONS && CurrentRuleType == ANTLRv4Lexer.TOKEN_REF) + { + CurrentRuleType = OPTIONS_CONSTRUCT; + } else if (Type == ANTLRv4Lexer.RBRACE && CurrentRuleType == PREQUEL_CONSTRUCT) { // exit prequel construct CurrentRuleType = TokenConstants.InvalidType; } + else if (Type == ANTLRv4Lexer.RBRACE && CurrentRuleType == OPTIONS_CONSTRUCT) + { // exit options + CurrentRuleType = ANTLRv4Lexer.TOKEN_REF; + } else if (Type == ANTLRv4Lexer.AT && CurrentRuleType == TokenConstants.InvalidType) { // enter action CurrentRuleType = ANTLRv4Lexer.AT; } + else if (Type == ANTLRv4Lexer.SEMI && CurrentRuleType == OPTIONS_CONSTRUCT) + { // ';' in options { .... }. Don't change anything. + } else if (Type == ANTLRv4Lexer.END_ACTION && CurrentRuleType == ANTLRv4Lexer.AT) { // exit action @@ -155,7 +153,6 @@ public override IToken Emit() { Type = ANTLRv4Lexer.TOKEN_REF; } - if (char.IsLower(firstChar)) { Type = ANTLRv4Lexer.RULE_REF; @@ -177,8 +174,8 @@ public override IToken Emit() public override void Reset() { - CurrentRuleType = TokenConstants.InvalidType; - _insideOptionsBlock = false; + CurrentRuleType = TokenConstants.InvalidType; + _insideOptionsBlock = false; base.Reset(); } } diff --git a/antlr/antlr4/Cpp/ANTLRv4Lexer.g4 b/antlr/antlr4/Cpp/ANTLRv4Lexer.g4 new file mode 100644 index 0000000000..c96864c5a2 --- /dev/null +++ b/antlr/antlr4/Cpp/ANTLRv4Lexer.g4 @@ -0,0 +1,407 @@ +/* + * [The "BSD license"] + * Copyright (c) 2012-2015 Terence Parr + * Copyright (c) 2012-2015 Sam Harwell + * Copyright (c) 2015 Gerald Rosenberg + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/** + * A grammar for ANTLR v4 implemented using v4 syntax + * + * Modified 2015.06.16 gbr + * -- update for compatibility with Antlr v4.5 + */ + +// ====================================================== +// Lexer specification +// ====================================================== + +lexer grammar ANTLRv4Lexer; + +options { superClass = LexerAdaptor; } +import LexBasic; + +@header +{ +#include "LexerAdaptor.h" +} + + +// Standard set of fragments +tokens { TOKEN_REF , RULE_REF , LEXER_CHAR_SET } +channels { OFF_CHANNEL , COMMENT } + +// ------------------------- +// Comments +DOC_COMMENT + : DocComment -> channel (COMMENT) + ; + +BLOCK_COMMENT + : BlockComment -> channel (COMMENT) + ; + +LINE_COMMENT + : LineComment -> channel (COMMENT) + ; + +// ------------------------- +// Integer + +INT + : DecimalNumeral + ; + +// ------------------------- +// Literal string +// +// ANTLR makes no distinction between a single character literal and a +// multi-character string. All literals are single quote delimited and +// may contain unicode escape sequences of the form \uxxxx, where x +// is a valid hexadecimal number (per Unicode standard). +STRING_LITERAL + : SQuoteLiteral + ; + +UNTERMINATED_STRING_LITERAL + : USQuoteLiteral + ; + +// ------------------------- +// Arguments +// +// Certain argument lists, such as those specifying call parameters +// to a rule invocation, or input parameters to a rule specification +// are contained within square brackets. +BEGIN_ARGUMENT + : LBrack + { this->handleBeginArgument(); } + ; + +// ------------------------- +// Target Language Actions +BEGIN_ACTION + : LBrace -> pushMode (TargetLanguageAction) + ; + +// ------------------------- +// Keywords +// +// 'options', 'tokens', and 'channels' are considered keywords +// but only when followed by '{', and considered as a single token. +// Otherwise, the symbols are tokenized as RULE_REF and allowed as +// an identifier in a labeledElement. +OPTIONS : 'options' WSNLCHARS* '{' ; +TOKENS : 'tokens' WSNLCHARS* '{' ; +CHANNELS : 'channels' WSNLCHARS* '{' ; + +fragment WSNLCHARS : ' ' | '\t' | '\f' | '\n' | '\r' ; + +IMPORT + : 'import' + ; + +FRAGMENT + : 'fragment' + ; + +LEXER + : 'lexer' + ; + +PARSER + : 'parser' + ; + +GRAMMAR + : 'grammar' + ; + +PROTECTED + : 'protected' + ; + +PUBLIC + : 'public' + ; + +PRIVATE + : 'private' + ; + +RETURNS + : 'returns' + ; + +LOCALS + : 'locals' + ; + +THROWS + : 'throws' + ; + +CATCH + : 'catch' + ; + +FINALLY + : 'finally' + ; + +MODE + : 'mode' + ; + // ------------------------- + // Punctuation + +COLON + : Colon + ; + +COLONCOLON + : DColon + ; + +COMMA + : Comma + ; + +SEMI + : Semi + ; + +LPAREN + : LParen + ; + +RPAREN + : RParen + ; + +LBRACE + : LBrace + ; + +RBRACE + : RBrace + ; + +RARROW + : RArrow + ; + +LT + : Lt + ; + +GT + : Gt + ; + +ASSIGN + : Equal + ; + +QUESTION + : Question + ; + +STAR + : Star + ; + +PLUS_ASSIGN + : PlusAssign + ; + +PLUS + : Plus + ; + +OR + : Pipe + ; + +DOLLAR + : Dollar + ; + +RANGE + : Range + ; + +DOT + : Dot + ; + +AT + : At + ; + +POUND + : Pound + ; + +NOT + : Tilde + ; + // ------------------------- + // Identifiers - allows unicode rule/token names + +ID + : Id + ; + // ------------------------- + // Whitespace + +WS + : Ws+ -> channel (OFF_CHANNEL) + ; + +// ------------------------- +// Illegal Characters +// +// This is an illegal character trap which is always the last rule in the +// lexer specification. It matches a single character of any value and being +// the last rule in the file will match when no other rule knows what to do +// about the character. It is reported as an error but is not passed on to the +// parser. This means that the parser to deal with the gramamr file anyway +// but we will not try to analyse or code generate from a file with lexical +// errors. + +// Comment this rule out to allow the error to be propagated to the parser +ERRCHAR + : . -> channel (HIDDEN) + ; + +// ====================================================== +// Lexer modes +// ------------------------- +// Arguments +mode Argument; +// E.g., [int x, List a[]] +NESTED_ARGUMENT + : LBrack -> type (ARGUMENT_CONTENT) , pushMode (Argument) + ; + +ARGUMENT_ESCAPE + : EscAny -> type (ARGUMENT_CONTENT) + ; + +ARGUMENT_STRING_LITERAL + : DQuoteLiteral -> type (ARGUMENT_CONTENT) + ; + +ARGUMENT_CHAR_LITERAL + : SQuoteLiteral -> type (ARGUMENT_CONTENT) + ; + +END_ARGUMENT + : RBrack + { this->handleEndArgument(); } + ; + +// added this to return non-EOF token type here. EOF does something weird +UNTERMINATED_ARGUMENT + : EOF -> popMode + ; + +ARGUMENT_CONTENT + : . + ; + +// ------------------------- +// Target Language Actions +// +// Many language targets use {} as block delimiters and so we +// must recursively match {} delimited blocks to balance the +// braces. Additionally, we must make some assumptions about +// literal string representation in the target language. We assume +// that they are delimited by ' or " and so consume these +// in their own alts so as not to inadvertantly match {}. +mode TargetLanguageAction; +NESTED_ACTION + : LBrace -> type (ACTION_CONTENT) , pushMode (TargetLanguageAction) + ; + +ACTION_ESCAPE + : EscAny -> type (ACTION_CONTENT) + ; + +ACTION_STRING_LITERAL + : DQuoteLiteral -> type (ACTION_CONTENT) + ; + +ACTION_CHAR_LITERAL + : SQuoteLiteral -> type (ACTION_CONTENT) + ; + +ACTION_DOC_COMMENT + : DocComment -> type (ACTION_CONTENT) + ; + +ACTION_BLOCK_COMMENT + : BlockComment -> type (ACTION_CONTENT) + ; + +ACTION_LINE_COMMENT + : LineComment -> type (ACTION_CONTENT) + ; + +END_ACTION + : RBrace + { this->handleEndAction(); } + ; + +UNTERMINATED_ACTION + : EOF -> popMode + ; + +ACTION_CONTENT + : . + ; + +// ------------------------- +mode LexerCharSet; +LEXER_CHAR_SET_BODY + : (~ [\]\\] | EscAny)+ -> more + ; + +LEXER_CHAR_SET + : RBrack -> popMode + ; + +UNTERMINATED_CHAR_SET + : EOF -> popMode + ; + +// ------------------------------------------------------------------------------ +// Grammar specific Keywords, Punctuation, etc. +fragment Id + : NameStartChar NameChar* + ; + diff --git a/antlr/antlr4/Cpp/LexerAdaptor.cpp b/antlr/antlr4/Cpp/LexerAdaptor.cpp index 76ae8bd0c4..ab3388bd16 100644 --- a/antlr/antlr4/Cpp/LexerAdaptor.cpp +++ b/antlr/antlr4/Cpp/LexerAdaptor.cpp @@ -31,20 +31,23 @@ //using namespace antlr4; -LexerAdaptor::LexerAdaptor(CharStream *input) : Lexer(input) {} +LexerAdaptor::LexerAdaptor(antlr4::CharStream *input) : Lexer(input) { + currentRuleType = antlr4::Token::INVALID_TYPE; + _insideOptionsBlock = false; + PREQUEL_CONSTRUCT = -10; + OPTIONS_CONSTRUCT = -11; +// stream = input; +} -int -LexerAdaptor::getCurrentRuleType() { +int LexerAdaptor::getCurrentRuleType() { return currentRuleType; } -void -LexerAdaptor::setCurrentRuleType(int ruleType) { +void LexerAdaptor::setCurrentRuleType(int ruleType) { currentRuleType = ruleType; } -void -LexerAdaptor::handleBeginArgument() { +void LexerAdaptor::handleBeginArgument() { if (inLexerRule()) { pushMode(ANTLRv4Lexer::LexerCharSet); more(); @@ -53,49 +56,77 @@ LexerAdaptor::handleBeginArgument() { } } -void -LexerAdaptor::handleEndArgument() { +void LexerAdaptor::handleEndArgument() { popMode(); if (modeStack.size() > 0) { setType(ANTLRv4Lexer::ARGUMENT_CONTENT); } } -void -LexerAdaptor::handleEndAction() { - popMode(); - if (modeStack.size() > 0) { +void LexerAdaptor::handleEndAction() { + auto oldMode = mode; + auto newMode = popMode(); + + if (modeStack.size() > 0 && newMode == ANTLRv4Lexer::TargetLanguageAction && oldMode == newMode) { setType(ANTLRv4Lexer::ACTION_CONTENT); } } -Token* -LexerAdaptor::emit() { - if (type == ANTLRv4Lexer::ID) { - std::string firstChar = _input->getText(misc::Interval(tokenStartCharIndex, tokenStartCharIndex)); +antlr4::Token* LexerAdaptor::emit() { + if ((type == ANTLRv4Lexer::OPTIONS || type == ANTLRv4Lexer::TOKENS || type == ANTLRv4Lexer::CHANNELS) && currentRuleType == antlr4::Token::INVALID_TYPE) + { + // enter prequel construct ending with an RBRACE + currentRuleType = PREQUEL_CONSTRUCT; + } + else if (type == ANTLRv4Lexer::OPTIONS && currentRuleType == ANTLRv4Lexer::TOKEN_REF) + { + currentRuleType = OPTIONS_CONSTRUCT; + } + else if (type == ANTLRv4Lexer::RBRACE && currentRuleType == PREQUEL_CONSTRUCT) + { + // exit prequel construct + currentRuleType = antlr4::Token::INVALID_TYPE; + } + else if (type == ANTLRv4Lexer::RBRACE && currentRuleType == OPTIONS_CONSTRUCT) + { // exit options + currentRuleType = ANTLRv4Lexer::TOKEN_REF; + } + else if (type == ANTLRv4Lexer::AT && currentRuleType == antlr4::Token::INVALID_TYPE) + { + // enter action + currentRuleType = ANTLRv4Lexer::AT; + } + else if (type == ANTLRv4Lexer::SEMI && currentRuleType == OPTIONS_CONSTRUCT) + { // ';' in options { .... }. Don't change anything. + } + else if (type == ANTLRv4Lexer::END_ACTION && currentRuleType == ANTLRv4Lexer::AT) + { + // exit action + currentRuleType = antlr4::Token::INVALID_TYPE; + } + else if (type == ANTLRv4Lexer::ID) + { + std::string firstChar = _input->getText(antlr4::misc::Interval(tokenStartCharIndex, tokenStartCharIndex)); if (isupper(firstChar.at(0))) { type = ANTLRv4Lexer::TOKEN_REF; } else { type = ANTLRv4Lexer::RULE_REF; } - if (currentRuleType == Token::INVALID_TYPE) { // if outside of rule def + if (currentRuleType == antlr4::Token::INVALID_TYPE) { // if outside of rule def currentRuleType = type; // set to inside lexer or parser rule } } else if (type == ANTLRv4Lexer::SEMI) { // exit rule def - currentRuleType = Token::INVALID_TYPE; + currentRuleType = antlr4::Token::INVALID_TYPE; } return Lexer::emit(); } -bool -LexerAdaptor::inLexerRule() { +bool LexerAdaptor::inLexerRule() { return currentRuleType == ANTLRv4Lexer::TOKEN_REF; } -bool -LexerAdaptor::inParserRule() { // not used, but added for clarity +bool LexerAdaptor::inParserRule() { // not used, but added for clarity return currentRuleType == ANTLRv4Lexer::RULE_REF; } - diff --git a/antlr/antlr4/Cpp/LexerAdaptor.h b/antlr/antlr4/Cpp/LexerAdaptor.h index d76f3e1a5b..2073659826 100644 --- a/antlr/antlr4/Cpp/LexerAdaptor.h +++ b/antlr/antlr4/Cpp/LexerAdaptor.h @@ -31,24 +31,20 @@ #include "antlr4-runtime.h" class LexerAdaptor : public antlr4::Lexer { + private: + int PREQUEL_CONSTRUCT; + int OPTIONS_CONSTRUCT; + bool _insideOptionsBlock; + int currentRuleType = antlr4::Token::INVALID_TYPE; public: LexerAdaptor(antlr4::CharStream *input); - int currentRuleType = antlr4::Token::INVALID_TYPE; - int getCurrentRuleType(); - void setCurrentRuleType(int ruleType); - void handleBeginArgument(); - void handleEndArgument(); - void handleEndAction(); - antlr4::Token* emit(); - bool inLexerRule(); - bool inParserRule(); }; diff --git a/antlr/antlr4/Java/LexerAdaptor.java b/antlr/antlr4/Java/LexerAdaptor.java index 340c1e0eac..338000e6e7 100644 --- a/antlr/antlr4/Java/LexerAdaptor.java +++ b/antlr/antlr4/Java/LexerAdaptor.java @@ -38,6 +38,7 @@ public abstract class LexerAdaptor extends Lexer { * Generic type for OPTIONS, TOKENS and CHANNELS */ private static final int PREQUEL_CONSTRUCT = -10; + private static final int OPTIONS_CONSTRUCT = -11; public LexerAdaptor(CharStream input) { super(input); @@ -95,25 +96,23 @@ protected void handleEndAction() { } } - protected void handleOptionsLBrace() { - if (insideOptionsBlock) { - setType(ANTLRv4Lexer.BEGIN_ACTION); - pushMode(ANTLRv4Lexer.TargetLanguageAction); - } else { - setType(ANTLRv4Lexer.LBRACE); - insideOptionsBlock = true; - } - } - @Override public Token emit() { if ((_type == ANTLRv4Lexer.OPTIONS || _type == ANTLRv4Lexer.TOKENS || _type == ANTLRv4Lexer.CHANNELS) && getCurrentRuleType() == Token.INVALID_TYPE) { // enter prequel construct ending with an RBRACE setCurrentRuleType(PREQUEL_CONSTRUCT); + } else if (_type == ANTLRv4Lexer.OPTIONS && getCurrentRuleType() == ANTLRv4Lexer.TOKEN_REF) + { + setCurrentRuleType(OPTIONS_CONSTRUCT); } else if (_type == ANTLRv4Lexer.RBRACE && getCurrentRuleType() == PREQUEL_CONSTRUCT) { // exit prequel construct setCurrentRuleType(Token.INVALID_TYPE); + } else if (_type == ANTLRv4Lexer.RBRACE && getCurrentRuleType() == OPTIONS_CONSTRUCT) + { // exit options + setCurrentRuleType(ANTLRv4Lexer.TOKEN_REF); } else if (_type == ANTLRv4Lexer.AT && getCurrentRuleType() == Token.INVALID_TYPE) { // enter action setCurrentRuleType(ANTLRv4Lexer.AT); + } else if (_type == ANTLRv4Lexer.SEMI && getCurrentRuleType() == OPTIONS_CONSTRUCT) + { // ';' in options { .... }. Don't change anything. } else if (_type == ANTLRv4Lexer.END_ACTION && getCurrentRuleType() == ANTLRv4Lexer.AT) { // exit action setCurrentRuleType(Token.INVALID_TYPE); } else if (_type == ANTLRv4Lexer.ID) { diff --git a/antlr/antlr4/JavaScript/LexerAdaptor.js b/antlr/antlr4/JavaScript/LexerAdaptor.js index fe1eeec865..36f809f563 100644 --- a/antlr/antlr4/JavaScript/LexerAdaptor.js +++ b/antlr/antlr4/JavaScript/LexerAdaptor.js @@ -11,6 +11,7 @@ export default class LexerAdaptor extends antlr4.Lexer * Generic type for OPTIONS, TOKENS and CHANNELS */ this.PREQUEL_CONSTRUCT = -10; + this.OPTIONS_CONSTRUCT = -11; /** * Track whether we are inside of a rule and whether it is lexical parser. _currentRuleType==Token.INVALID_TYPE @@ -69,25 +70,20 @@ export default class LexerAdaptor extends antlr4.Lexer } } - handleOptionsLBrace() { - if (this.insideOptionsBlock == true) { - this._type = ANTLRv4Lexer.BEGIN_ACTION; - this.pushMode(ANTLRv4Lexer.TargetLanguageAction); - } else { - this._type = ANTLRv4Lexer.LBRACE; - this.insideOptionsBlock = true; - } - } - emit() { if ((this._type == ANTLRv4Lexer.OPTIONS || this._type == ANTLRv4Lexer.TOKENS || this._type == ANTLRv4Lexer.CHANNELS) && this.getCurrentRuleType() == antlr4.Token.INVALID_TYPE) { // enter prequel construct ending with an RBRACE this.setCurrentRuleType(this.PREQUEL_CONSTRUCT); + } else if (this._type == ANTLRv4Lexer.OPTIONS && this.getCurrentRuleType() == ANTLRv4Lexer.TOKEN_REF) { + this.setCurrentRuleType(this.OPTIONS_CONSTRUCT); } else if (this._type == ANTLRv4Lexer.RBRACE && this.getCurrentRuleType() == this.PREQUEL_CONSTRUCT) { // exit prequel construct this.setCurrentRuleType(antlr4.Token.INVALID_TYPE); + } else if (this._type == ANTLRv4Lexer.RBRACE && this.getCurrentRuleType() == this.OPTIONS_CONSTRUCT) { // exit options + this.setCurrentRuleType(ANTLRv4Lexer.TOKEN_REF); } else if (this._type == ANTLRv4Lexer.AT && this.getCurrentRuleType() == antlr4.Token.INVALID_TYPE) { // enter action this.setCurrentRuleType(ANTLRv4Lexer.AT); + } else if (this._type == ANTLRv4Lexer.SEMI && this.getCurrentRuleType() == this.OPTIONS_CONSTRUCT) { // ';' in options { .... }. Don't change anything. } else if (this._type == ANTLRv4Lexer.END_ACTION && this.getCurrentRuleType() == ANTLRv4Lexer.AT) { // exit action this.setCurrentRuleType(antlr4.Token.INVALID_TYPE); } else if (this._type == ANTLRv4Lexer.ID) { diff --git a/antlr/antlr4/Python3/LexerAdaptor.py b/antlr/antlr4/Python3/LexerAdaptor.py index 519f04995f..8dbb110423 100644 --- a/antlr/antlr4/Python3/LexerAdaptor.py +++ b/antlr/antlr4/Python3/LexerAdaptor.py @@ -43,7 +43,11 @@ class LexerAdaptor(Lexer): can only occur in lexical rules and arg actions cannot occur. """ + PREQUEL_CONSTRUCT = -10 + OPTIONS_CONSTRUCT = -11 + _currentRuleType = Token.INVALID_TYPE + insideOptionsBlock = False def __init__(self, inp, output): Lexer.__init__(self, inp, output) @@ -67,12 +71,28 @@ def handleEndArgument(self): self._type = self.ARGUMENT_CONTENT def handleEndAction(self): - self.popMode() - if len(self._modeStack) > 0: + oldMode = self._mode + newMode = self.popMode() + isActionWithinAction = len(self._modeStack) > 0 and newMode == self.TargetLanguageAction and oldMode == newMode + if isActionWithinAction: self._type = self.ACTION_CONTENT def emit(self): - if self._type == self.ID: + if (self._type == self.OPTIONS or self._type == self.TOKENS or self._type == self.CHANNELS) and self._currentRuleType == Token.INVALID_TYPE: + self._currentRuleType = self.PREQUEL_CONSTRUCT + elif self._type == self.OPTIONS and self._currentRuleType == self.TOKEN_REF: + self._currentRuleType = self.OPTIONS_CONSTRUCT + elif self._type == self.RBRACE and self._currentRuleType == self.PREQUEL_CONSTRUCT: + self._currentRuleType = Token.INVALID_TYPE + elif self._type == self.RBRACE and self._currentRuleType == self.OPTIONS_CONSTRUCT: + self._currentRuleType = self.TOKEN_REF + elif self._type == self.AT and self._currentRuleType == Token.INVALID_TYPE: + self._currentRuleType = self.AT + elif self._type == self.SEMI and self._currentRuleType == self.OPTIONS_CONSTRUCT: + self._currentRuleType = self._currentRuleType + elif self._type == self.END_ACTION and self._currentRuleType == self.AT: + self._currentRuleType = Token.INVALID_TYPE + elif self._type == self.ID: firstChar = self._input.getText(self._tokenStartCharIndex, self._tokenStartCharIndex) if firstChar[0].isupper(): self._type = self.TOKEN_REF diff --git a/antlr/antlr4/examples/PhpLexer.g4 b/antlr/antlr4/examples/PhpLexer.g4 new file mode 100644 index 0000000000..8af384fb38 --- /dev/null +++ b/antlr/antlr4/examples/PhpLexer.g4 @@ -0,0 +1,375 @@ +/* +PHP grammar. +The MIT License (MIT). +Copyright (c) 2015-2020, Ivan Kochurkin (kvanttt@gmail.com), Positive Technologies. +Copyright (c) 2019, Thierry Marianne (thierry.marianne@weaving-the-web.org) +Copyright (c) 2019-2020, Student Main for php7, php8 support. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +lexer grammar PhpLexer; + +channels { PhpComments, ErrorLexem, SkipChannel } + +options { + superClass=PhpLexerBase; + caseInsensitive = true; +} + +SeaWhitespace: [ \t\r\n]+ -> channel(HIDDEN); +HtmlText: ~[<#]+; +XmlStart: ' pushMode(XML); +PHPStartEcho: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStart: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +HtmlScriptOpen: ' pushMode(INSIDE); +HtmlStyleOpen: ' pushMode(INSIDE); +HtmlComment: '' -> channel(HIDDEN); +HtmlDtd: ''; +HtmlOpen: '<' -> pushMode(INSIDE); +Shebang + : '#' { this.IsNewLineOrStart(-2) }? '!' ~[\r\n]* + ; +NumberSign: '#' ~'<'* -> more; +Error: . -> channel(ErrorLexem); + +// TODO: parse xml attributes. +mode XML; + +XmlText: ~'?'+; +XmlClose: '?>' -> popMode; +XmlText2: '?' -> type(XmlText); + +mode INSIDE; + +PHPStartEchoInside: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartInside: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +HtmlClose: '>' { this.PushModeOnHtmlClose(); }; +HtmlSlashClose: '/>' -> popMode; +HtmlSlash: '/'; +HtmlEquals: '='; + +HtmlStartQuoteString: '\\'? '\'' -> pushMode(HtmlQuoteStringMode); +HtmlStartDoubleQuoteString: '\\'? '"' -> pushMode(HtmlDoubleQuoteStringMode); +HtmlHex: '#' HexDigit+ ; +HtmlDecimal: Digit+; +HtmlSpace: [ \t\r\n]+ -> channel(HIDDEN); +HtmlName: HtmlNameStartChar HtmlNameChar*; +ErrorInside: . -> channel(ErrorLexem); + +mode HtmlQuoteStringMode; + +PHPStartEchoInsideQuoteString: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartInsideQuoteString: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +HtmlEndQuoteString: '\'' '\''? -> popMode; +HtmlQuoteString: ~[<']+; +ErrorHtmlQuote: . -> channel(ErrorLexem); + +mode HtmlDoubleQuoteStringMode; + +PHPStartEchoDoubleQuoteString: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartDoubleQuoteString: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +HtmlEndDoubleQuoteString: '"' '"'? -> popMode; +HtmlDoubleQuoteString: ~[<"]+; +ErrorHtmlDoubleQuote: . -> channel(ErrorLexem); + +// Parse JavaScript with https://github.com/antlr/grammars-v4/tree/master/javascript if necessary. +// Php blocks can exist inside Script blocks too. +mode SCRIPT; + +ScriptText: ~'<'+; +// TODO: handle JS strings, but handle type(ScriptText); +//ScriptString2: '\'' (~'\'' | '\\' ('\r'? '\n' | .))* '\'' -> type(ScriptText); +HtmlScriptClose: '' -> popMode; +PHPStartInsideScriptEcho: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartInsideScript: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +ScriptText2: '<' -> type(ScriptText); + +mode STYLE; + +StyleBody: .*? '' -> popMode; + +mode PHP; + +PHPEnd: ('?' | '%' {this.HasAspTags()}?) '>' + | '' {this.HasPhpScriptTag()}?; +Whitespace: [ \t\r\n]+ -> channel(SkipChannel); +MultiLineComment: '/*' .*? '*/' -> channel(PhpComments); +SingleLineComment: '//' -> channel(SkipChannel), pushMode(SingleLineCommentMode); +ShellStyleComment: '#' -> channel(SkipChannel), pushMode(SingleLineCommentMode); + +AttributeStart: '#['; + +Abstract: 'abstract'; +Array: 'array'; +As: 'as'; +BinaryCast: 'binary'; +BoolType: 'bool' 'ean'?; +BooleanConstant: 'true' + | 'false'; +Break: 'break'; +Callable: 'callable'; +Case: 'case'; +Catch: 'catch'; +Class: 'class'; +Clone: 'clone'; +Const: 'const'; +Continue: 'continue'; +Declare: 'declare'; +Default: 'default'; +Do: 'do'; +DoubleCast: 'real'; +DoubleType: 'double'; +Echo: 'echo'; +Else: 'else'; +ElseIf: 'elseif'; +Empty: 'empty'; + +EndDeclare: 'enddeclare'; +EndFor: 'endfor'; +EndForeach: 'endforeach'; +EndIf: 'endif'; +EndSwitch: 'endswitch'; +EndWhile: 'endwhile'; + +Eval: 'eval'; +Exit: 'die'; +Extends: 'extends'; +Final: 'final'; +Finally: 'finally'; +FloatCast: 'float'; +For: 'for'; +Foreach: 'foreach'; +Function_: 'function'; +Global: 'global'; +Goto: 'goto'; +If: 'if'; +Implements: 'implements'; +Import: 'import'; +Include: 'include'; +IncludeOnce: 'include_once'; +InstanceOf: 'instanceof'; +InsteadOf: 'insteadof'; +Int8Cast: 'int8'; +Int16Cast: 'int16'; +Int64Type: 'int64'; +IntType: 'int' 'eger'?; +Interface: 'interface'; +IsSet: 'isset'; +List: 'list'; +LogicalAnd: 'and'; +LogicalOr: 'or'; +LogicalXor: 'xor'; +Match_: 'match'; +Namespace: 'namespace'; +New: 'new'; +Null: 'null'; +ObjectType: 'object'; +Parent_: 'parent'; +Partial: 'partial'; +Print: 'print'; +Private: 'private'; +Protected: 'protected'; +Public: 'public'; +Require: 'require'; +RequireOnce: 'require_once'; +Resource: 'resource'; +Return: 'return'; +Static: 'static'; +StringType: 'string'; +Switch: 'switch'; +Throw: 'throw'; +Trait: 'trait'; +Try: 'try'; +Typeof: 'clrtypeof'; +UintCast: 'uint' ('8' | '16' | '64')?; +UnicodeCast: 'unicode'; +Unset: 'unset'; +Use: 'use'; +Var: 'var'; +While: 'while'; +Yield: 'yield'; +From: 'from'; +LambdaFn: 'fn'; + +Get: '__get'; +Set: '__set'; +Call: '__call'; +CallStatic: '__callstatic'; +Constructor: '__construct'; +Destruct: '__destruct'; +Wakeup: '__wakeup'; +Sleep: '__sleep'; +Autoload: '__autoload'; +IsSet__: '__isset'; +Unset__: '__unset'; +ToString__: '__tostring'; +Invoke: '__invoke'; +SetState: '__set_state'; +Clone__: '__clone'; +DebugInfo: '__debuginfo'; +Namespace__: '__namespace__'; +Class__: '__class__'; +Traic__: '__trait__'; +Function__: '__function__'; +Method__: '__method__'; +Line__: '__line__'; +File__: '__file__'; +Dir__: '__dir__'; + +Spaceship: '<=>'; +Lgeneric: '<:'; +Rgeneric: ':>'; +DoubleArrow: '=>'; +Inc: '++'; +Dec: '--'; +IsIdentical: '==='; +IsNoidentical: '!=='; +IsEqual: '=='; +IsNotEq: '<>' + | '!='; +IsSmallerOrEqual: '<='; +IsGreaterOrEqual: '>='; +PlusEqual: '+='; +MinusEqual: '-='; +MulEqual: '*='; +Pow: '**'; +PowEqual: '**='; +DivEqual: '/='; +Concaequal: '.='; +ModEqual: '%='; +ShiftLeftEqual: '<<='; +ShiftRightEqual: '>>='; +AndEqual: '&='; +OrEqual: '|='; +XorEqual: '^='; +BooleanOr: '||'; +BooleanAnd: '&&'; + +NullCoalescing: '??'; +NullCoalescingEqual:'??='; + +ShiftLeft: '<<'; +ShiftRight: '>>'; +DoubleColon: '::'; +ObjectOperator: '->'; +NamespaceSeparator: '\\'; +Ellipsis: '...'; +Less: '<'; +Greater: '>'; +Ampersand: '&'; +Pipe: '|'; +Bang: '!'; +Caret: '^'; +Plus: '+'; +Minus: '-'; +Asterisk: '*'; +Percent: '%'; +Divide: '/'; +Tilde: '~'; +SuppressWarnings: '@'; +Dollar: '$'; +Dot: '.'; +QuestionMark: '?'; +OpenRoundBracket: '('; +CloseRoundBracket: ')'; +OpenSquareBracket: '['; +CloseSquareBracket: ']'; +OpenCurlyBracket: '{'; +CloseCurlyBracket: '}' +{ this.PopModeOnCurlyBracketClose(); }; +Comma: ','; +Colon: ':'; +SemiColon: ';'; +Eq: '='; +Quote: '\''; +BackQuote: '`'; + +VarName: '$' NameString; +Label: [a-z_][a-z_0-9]*; +Octal: '0' [0-7]+; +Decimal: '0' | NonZeroDigit Digit*; +Real: (Digit+ '.' Digit* | '.' Digit+) ExponentPart? + | Digit+ ExponentPart; +Hex: '0x' HexDigit+; +Binary: '0b' [01_]+; + +BackQuoteString: '`' ~'`'* '`'; +SingleQuoteString: '\'' (~('\'' | '\\') | '\\' . )* '\''; +DoubleQuote: '"' -> pushMode(InterpolationString); + +StartNowDoc + : '<<<' [ \t]* '\'' NameString '\'' { this.ShouldPushHereDocMode(1) }? -> pushMode(HereDoc) + ; +StartHereDoc + : '<<<' [ \t]* NameString { this.ShouldPushHereDocMode(1) }? -> pushMode(HereDoc) + ; +ErrorPhp: . -> channel(ErrorLexem); + +mode InterpolationString; + +VarNameInInterpolation: '$' NameString -> type(VarName); // TODO: fix such cases: "$people->john" +DollarString: '$' -> type(StringPart); +CurlyDollar: '{' { this.IsCurlyDollar(1) }? { this.SetInsideString(); } -> channel(SkipChannel), pushMode(PHP); +CurlyString: '{' -> type(StringPart); +EscapedChar: '\\' . -> type(StringPart); +DoubleQuoteInInterpolation: '"' -> type(DoubleQuote), popMode; +UnicodeEscape: '\\u{' [a-z0-9][a-z0-9]+ '}'; +StringPart: ~[${\\"]+; + +mode SingleLineCommentMode; + +Comment: ~[\r\n?]+ -> channel(PhpComments); +PHPEndSingleLineComment: '?' '>'; +CommentQuestionMark: '?' -> type(Comment), channel(PhpComments); +CommentEnd: [\r\n] -> channel(SkipChannel), popMode; // exit from comment. + +mode HereDoc; // TODO: interpolation for heredoc strings. + +HereDocText: ~[\r\n]*? ('\r'? '\n' | '\r'); + +// fragments. +// '' will be transformed to '' +fragment PhpStartEchoFragment: '<' ('?' '=' | { this.HasAspTags() }? '%' '='); +fragment PhpStartFragment: '<' ('?' 'php'? | { this.HasAspTags() }? '%'); +fragment NameString options { caseInsensitive = false; }: [a-zA-Z_\u0080-\ufffe][a-zA-Z0-9_\u0080-\ufffe]*; +fragment HtmlNameChar options { caseInsensitive = false; } + : HtmlNameStartChar + | '-' + | '_' + | '.' + | Digit + | '\u00B7' + | '\u0300'..'\u036F' + | '\u203F'..'\u2040' + ; +fragment HtmlNameStartChar options { caseInsensitive = false; } + : [:a-zA-Z] + | '\u2070'..'\u218F' + | '\u2C00'..'\u2FEF' + | '\u3001'..'\uD7FF' + | '\uF900'..'\uFDCF' + | '\uFDF0'..'\uFFFD' + ; +fragment ExponentPart: 'e' [+-]? Digit+; +fragment NonZeroDigit: [1-9_]; +fragment Digit: [0-9_]; +fragment HexDigit: [a-f0-9_]; diff --git a/antlr/antlr4/examples/PhpParser.g4 b/antlr/antlr4/examples/PhpParser.g4 new file mode 100644 index 0000000000..451162dbf1 --- /dev/null +++ b/antlr/antlr4/examples/PhpParser.g4 @@ -0,0 +1,928 @@ +/* +PHP grammar. +The MIT License (MIT). +Copyright (c) 2015-2020, Ivan Kochurkin (kvanttt@gmail.com), Positive Technologies. +Copyright (c) 2019-2020, Student Main for php7, php8 support. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +parser grammar PhpParser; + +options { tokenVocab=PhpLexer; } + +// HTML +// Also see here: https://github.com/antlr/grammars-v4/tree/master/html + +htmlDocument + : Shebang? (inlineHtml | phpBlock)* EOF + ; + +inlineHtml + : htmlElement+ + | scriptText + ; + +// TODO: split into html, css and xml elements +htmlElement + : HtmlDtd + | HtmlClose + | HtmlStyleOpen + | HtmlOpen + | HtmlName + | HtmlSlashClose + | HtmlSlash + | HtmlText + | HtmlEquals + | HtmlStartQuoteString + | HtmlEndQuoteString + | HtmlStartDoubleQuoteString + | HtmlEndDoubleQuoteString + | HtmlHex + | HtmlDecimal + | HtmlQuoteString + | HtmlDoubleQuoteString + + | StyleBody + + | HtmlScriptOpen + | HtmlScriptClose + + | XmlStart XmlText* XmlClose + ; + +// Script +// Parse JavaScript with https://github.com/antlr/grammars-v4/tree/master/javascript if necessary. + +scriptText + : ScriptText+ + ; + +// PHP + +phpBlock + : importStatement* topStatement+ + ; + +importStatement + : Import Namespace namespaceNameList SemiColon + ; + +topStatement + : statement + | useDeclaration + | namespaceDeclaration + | functionDeclaration + | classDeclaration + | globalConstantDeclaration + ; + +useDeclaration + : Use (Function_ | Const)? useDeclarationContentList SemiColon + ; + +useDeclarationContentList + : '\\'? useDeclarationContent (',' '\\'? useDeclarationContent)* + ; + +useDeclarationContent + : namespaceNameList + ; + +namespaceDeclaration + : Namespace (namespaceNameList? OpenCurlyBracket namespaceStatement* CloseCurlyBracket | namespaceNameList SemiColon) + ; + +namespaceStatement + : statement + | useDeclaration + | functionDeclaration + | classDeclaration + | globalConstantDeclaration + ; + +functionDeclaration + : attributes? Function_ '&'? identifier typeParameterListInBrackets? '(' formalParameterList ')' (':' QuestionMark? typeHint)? blockStatement + ; + +classDeclaration + : attributes? Private? modifier? Partial? ( + classEntryType identifier typeParameterListInBrackets? (Extends qualifiedStaticTypeRef)? (Implements interfaceList)? + | Interface identifier typeParameterListInBrackets? (Extends interfaceList)? ) + OpenCurlyBracket classStatement* CloseCurlyBracket + ; + +classEntryType + : Class + | Trait + ; + +interfaceList + : qualifiedStaticTypeRef (',' qualifiedStaticTypeRef)* + ; + +typeParameterListInBrackets + : '<:' typeParameterList ':>' + | '<:' typeParameterWithDefaultsList ':>' + | '<:' typeParameterList ',' typeParameterWithDefaultsList ':>' + ; + +typeParameterList + : typeParameterDecl (',' typeParameterDecl)* + ; + +typeParameterWithDefaultsList + : typeParameterWithDefaultDecl (',' typeParameterWithDefaultDecl)* + ; + +typeParameterDecl + : attributes? identifier + ; + +typeParameterWithDefaultDecl + : attributes? identifier Eq (qualifiedStaticTypeRef | primitiveType) + ; + +genericDynamicArgs + : '<:' typeRef (',' typeRef)* ':>' + ; + +attributes + : attributeGroup+ + ; + +attributeGroup + : AttributeStart (identifier ':')? attribute (',' attribute)* ']' + ; + +attribute + : qualifiedNamespaceName arguments? + ; + +innerStatementList + : innerStatement* + ; + +innerStatement + : statement + | functionDeclaration + | classDeclaration + ; + +// Statements + +statement + : identifier ':' + | blockStatement + | ifStatement + | whileStatement + | doWhileStatement + | forStatement + | switchStatement + | breakStatement + | continueStatement + | returnStatement + | yieldExpression SemiColon + | globalStatement + | staticVariableStatement + | echoStatement + | expressionStatement + | unsetStatement + | foreachStatement + | tryCatchFinally + | throwStatement + | gotoStatement + | declareStatement + | emptyStatement_ + | inlineHtmlStatement + ; + +emptyStatement_ + : SemiColon + ; + +blockStatement + : OpenCurlyBracket innerStatementList CloseCurlyBracket + ; + +ifStatement + : If parentheses statement elseIfStatement* elseStatement? + | If parentheses ':' innerStatementList elseIfColonStatement* elseColonStatement? EndIf SemiColon + ; + +elseIfStatement + : ElseIf parentheses statement + ; + +elseIfColonStatement + : ElseIf parentheses ':' innerStatementList + ; + +elseStatement + : Else statement + ; + +elseColonStatement + : Else ':' innerStatementList + ; + +whileStatement + : While parentheses (statement | ':' innerStatementList EndWhile SemiColon) + ; + +doWhileStatement + : Do statement While parentheses SemiColon + ; + +forStatement + : For '(' forInit? SemiColon expressionList? SemiColon forUpdate? ')' (statement | ':' innerStatementList EndFor SemiColon ) + ; + +forInit + : expressionList + ; + +forUpdate + : expressionList + ; + +switchStatement + : Switch parentheses (OpenCurlyBracket SemiColon? switchBlock* CloseCurlyBracket | ':' SemiColon? switchBlock* EndSwitch SemiColon) + ; + +switchBlock + : ((Case expression | Default) (':' | SemiColon))+ innerStatementList + ; + +breakStatement + : Break expression? SemiColon + ; + +continueStatement + : Continue expression? SemiColon + ; + +returnStatement + : Return expression? SemiColon + ; + +expressionStatement + : expression SemiColon + ; + +unsetStatement + : Unset '(' chainList ')' SemiColon + ; + +foreachStatement + : Foreach + ( '(' chain As '&'? assignable ('=>' '&'? chain)? ')' + | '(' expression As assignable ('=>' '&'? chain)? ')' + | '(' chain As List '(' assignmentList ')' ')' ) + (statement | ':' innerStatementList EndForeach SemiColon) + ; + +tryCatchFinally + : Try blockStatement (catchClause+ finallyStatement? | catchClause* finallyStatement) + ; + +catchClause + : Catch '(' qualifiedStaticTypeRef ('|' qualifiedStaticTypeRef)* VarName ')' blockStatement + ; + +finallyStatement + : Finally blockStatement + ; + +throwStatement + : Throw expression SemiColon + ; + +gotoStatement + : Goto identifier SemiColon + ; + +declareStatement + : Declare '(' declareList ')' (statement | ':' innerStatementList EndDeclare SemiColon) + ; + +inlineHtmlStatement + : inlineHtml+ + ; + +declareList + : identifierInitializer (',' identifierInitializer)* + ; + +formalParameterList + : formalParameter? (',' formalParameter)* ','? + ; + +formalParameter + : attributes? memberModifier? QuestionMark? typeHint? '&'? '...'? variableInitializer + ; + +typeHint + : qualifiedStaticTypeRef + | Callable + | primitiveType + | typeHint '|' typeHint + ; + +globalStatement + : Global globalVar (',' globalVar)* SemiColon + ; + +globalVar + : VarName + | Dollar chain + | Dollar OpenCurlyBracket expression CloseCurlyBracket + ; + +echoStatement + : Echo expressionList SemiColon + ; + +staticVariableStatement + : Static variableInitializer (',' variableInitializer)* SemiColon + ; + +classStatement + : attributes? ( propertyModifiers typeHint? variableInitializer (',' variableInitializer)* SemiColon + | memberModifiers? ( Const typeHint? identifierInitializer (',' identifierInitializer)* SemiColon + | Function_ '&'? identifier typeParameterListInBrackets? '(' formalParameterList ')' + baseCtorCall? methodBody)) + | Use qualifiedNamespaceNameList traitAdaptations + ; + +traitAdaptations + : SemiColon + | OpenCurlyBracket traitAdaptationStatement* CloseCurlyBracket + ; + +traitAdaptationStatement + : traitPrecedence + | traitAlias + ; + +traitPrecedence + : qualifiedNamespaceName '::' identifier InsteadOf qualifiedNamespaceNameList SemiColon + ; + +traitAlias + : traitMethodReference As (memberModifier | memberModifier? identifier) SemiColon + ; + +traitMethodReference + : (qualifiedNamespaceName '::')? identifier + ; + +baseCtorCall + : ':' identifier arguments? + ; + +methodBody + : SemiColon + | blockStatement + ; + +propertyModifiers + : memberModifiers + | Var + ; + +memberModifiers + : memberModifier+ + ; + +variableInitializer + : VarName (Eq constantInitializer)? + ; + +identifierInitializer + : identifier Eq constantInitializer + ; + +globalConstantDeclaration + : attributes? Const identifierInitializer (',' identifierInitializer)* SemiColon + ; + +expressionList + : expression (',' expression)* + ; + +parentheses + : '(' (expression | yieldExpression) ')' + ; + +// Expressions +// Grouped by priorities: http://php.net/manual/en/language.operators.precedence.php +expression + : Clone expression #CloneExpression + | newExpr #NewExpression + + | stringConstant '[' expression ']' #IndexerExpression + + | '(' castOperation ')' expression #CastExpression + | ('~' | '@') expression #UnaryOperatorExpression + + | ('!' | '+' | '-') expression #UnaryOperatorExpression + + | ('++' | '--') chain #PrefixIncDecExpression + | chain ('++' | '--') #PostfixIncDecExpression + + | Print expression #PrintExpression + + | chain #ChainExpression + | constant #ScalarExpression + | string #ScalarExpression + | Label #ScalarExpression + + | BackQuoteString #BackQuoteStringExpression + | parentheses #ParenthesisExpression + | arrayCreation #ArrayCreationExpression + + | Yield #SpecialWordExpression + | List '(' assignmentList ')' Eq expression #SpecialWordExpression + | IsSet '(' chainList ')' #SpecialWordExpression + | Empty '(' chain ')' #SpecialWordExpression + | Eval '(' expression ')' #SpecialWordExpression + | Exit ( '(' ')' | parentheses )? #SpecialWordExpression + | (Include | IncludeOnce) expression #SpecialWordExpression + | (Require | RequireOnce) expression #SpecialWordExpression + + | lambdaFunctionExpr #LambdaFunctionExpression + | matchExpr #MatchExpression + + | expression op='**' expression #ArithmeticExpression + | expression InstanceOf typeRef #InstanceOfExpression + | expression op=('*' | Divide | '%') expression #ArithmeticExpression + + | expression op=('+' | '-' | '.') expression #ArithmeticExpression + + | expression op=('<<' | '>>') expression #ComparisonExpression + | expression op=(Less | '<=' | Greater | '>=') expression #ComparisonExpression + | expression op=('===' | '!==' | '==' | IsNotEq) expression #ComparisonExpression + + | expression op='&' expression #BitwiseExpression + | expression op='^' expression #BitwiseExpression + | expression op='|' expression #BitwiseExpression + | expression op='&&' expression #BitwiseExpression + | expression op='||' expression #BitwiseExpression + + | expression op=QuestionMark expression? ':' expression #ConditionalExpression + | expression op='??' expression #NullCoalescingExpression + | expression op='<=>' expression #SpaceshipExpression + + | Throw expression #SpecialWordExpression + + | assignable assignmentOperator attributes? expression #AssignmentExpression + | assignable Eq attributes? '&' (chain | newExpr) #AssignmentExpression + + | expression op=LogicalAnd expression #LogicalExpression + | expression op=LogicalXor expression #LogicalExpression + | expression op=LogicalOr expression #LogicalExpression + ; + +assignable + : chain + | arrayCreation + ; + +arrayCreation + : (Array '(' arrayItemList? ')' | '[' arrayItemList? ']') ('[' expression ']')? + ; + +lambdaFunctionExpr + : Static? Function_ '&'? '(' formalParameterList ')' lambdaFunctionUseVars? (':' typeHint)? blockStatement + | LambdaFn '(' formalParameterList')' '=>' expression + ; + +matchExpr + : Match_ '(' expression ')' OpenCurlyBracket matchItem (',' matchItem)* ','? CloseCurlyBracket + ; + +matchItem + : expression (',' expression)* '=>' expression + ; + +newExpr + : New typeRef arguments? + ; + +assignmentOperator + : Eq + | '+=' + | '-=' + | '*=' + | '**=' + | '/=' + | '.=' + | '%=' + | '&=' + | '|=' + | '^=' + | '<<=' + | '>>=' + | '??=' + ; + +yieldExpression + : Yield (expression ('=>' expression)? | From expression) + ; + +arrayItemList + : arrayItem (',' arrayItem)* ','? + ; + +arrayItem + : expression ('=>' expression)? + | (expression '=>')? '&' chain + ; + +lambdaFunctionUseVars + : Use '(' lambdaFunctionUseVar (',' lambdaFunctionUseVar)* ')' + ; + +lambdaFunctionUseVar + : '&'? VarName + ; + +qualifiedStaticTypeRef + : qualifiedNamespaceName genericDynamicArgs? + | Static + ; + +typeRef + : (qualifiedNamespaceName | indirectTypeRef) genericDynamicArgs? + | primitiveType + | Static + | anonymousClass + ; + +anonymousClass + : attributes? Private? modifier? Partial? ( + classEntryType typeParameterListInBrackets? (Extends qualifiedStaticTypeRef)? (Implements interfaceList)? + | Interface identifier typeParameterListInBrackets? (Extends interfaceList)? ) + OpenCurlyBracket classStatement* CloseCurlyBracket + ; + +indirectTypeRef + : chainBase ('->' keyedFieldName)* + ; + +qualifiedNamespaceName + : Namespace? '\\'? namespaceNameList + ; + +namespaceNameList + : identifier + | identifier ('\\' identifier)* ('\\' namespaceNameTail)? + ; + +namespaceNameTail + : identifier (As identifier)? + | OpenCurlyBracket namespaceNameTail (','namespaceNameTail)* ','? CloseCurlyBracket + ; + +qualifiedNamespaceNameList + : qualifiedNamespaceName (',' qualifiedNamespaceName)* + ; + +arguments + : '(' ( actualArgument (',' actualArgument)* | yieldExpression)? ','? ')' + ; + +actualArgument + : argumentName? '...'? expression + | '&' chain + ; + +argumentName + : identifier ':' + ; + +constantInitializer + : constant + | string + | Array '(' (arrayItemList ','?)? ')' + | '[' (arrayItemList ','?)? ']' + | ('+' | '-') constantInitializer + ; + +constant + : Null + | literalConstant + | magicConstant + | classConstant + | qualifiedNamespaceName + ; + +literalConstant + : Real + | BooleanConstant + | numericConstant + | stringConstant + ; + +numericConstant + : Octal + | Decimal + | Hex + | Binary + ; + +classConstant + : (Class | Parent_) '::' (identifier | Constructor | Get | Set) + | (qualifiedStaticTypeRef | keyedVariable | string) '::' (identifier | keyedVariable) // 'foo'::$bar works in php7 + ; + +stringConstant + : Label + ; + +string + : StartHereDoc HereDocText+ + | StartNowDoc HereDocText+ + | SingleQuoteString + | DoubleQuote interpolatedStringPart* DoubleQuote + ; + +interpolatedStringPart + : StringPart + | UnicodeEscape + | chain + ; + +chainList + : chain (',' chain)* + ; + +chain + : chainOrigin memberAccess* + //| arrayCreation // [$a,$b]=$c + ; + +chainOrigin + : chainBase + | functionCall + | '(' newExpr ')' + ; + +memberAccess + : '->' keyedFieldName actualArguments? + ; + +functionCall + : functionCallName actualArguments + ; + +functionCallName + : qualifiedNamespaceName + | classConstant + | chainBase + | parentheses + ; + +actualArguments + : genericDynamicArgs? arguments squareCurlyExpression* + ; + +chainBase + : keyedVariable ('::' keyedVariable)? + | qualifiedStaticTypeRef '::' keyedVariable + ; + +keyedFieldName + : keyedSimpleFieldName + | keyedVariable + ; + +keyedSimpleFieldName + : (identifier | OpenCurlyBracket expression CloseCurlyBracket) squareCurlyExpression* + ; + +keyedVariable + : Dollar* (VarName | Dollar OpenCurlyBracket expression CloseCurlyBracket) squareCurlyExpression* + ; + +squareCurlyExpression + : '[' expression? ']' + | OpenCurlyBracket expression CloseCurlyBracket + ; + +assignmentList + : assignmentListElement? (',' assignmentListElement?)* + ; + +assignmentListElement + : chain + | List '(' assignmentList ')' + | arrayItem + ; + +modifier + : Abstract + | Final + ; + +identifier + : Label + + | Abstract + | Array + | As + | BinaryCast + | BoolType + | BooleanConstant + | Break + | Callable + | Case + | Catch + | Class + | Clone + | Const + | Continue + | Declare + | Default + | Do + | DoubleCast + | DoubleType + | Echo + | Else + | ElseIf + | Empty + | EndDeclare + | EndFor + | EndForeach + | EndIf + | EndSwitch + | EndWhile + | Eval + | Exit + | Extends + | Final + | Finally + | FloatCast + | For + | Foreach + | Function_ + | Global + | Goto + | If + | Implements + | Import + | Include + | IncludeOnce + | InstanceOf + | InsteadOf + | Int16Cast + | Int64Type + | Int8Cast + | Interface + | IntType + | IsSet + | List + | LogicalAnd + | LogicalOr + | LogicalXor + | Namespace + | New + | Null + | ObjectType + | Parent_ + | Partial + | Print + | Private + | Protected + | Public + | Require + | RequireOnce + | Resource + | Return + | Static + | StringType + | Switch + | Throw + | Trait + | Try + | Typeof + | UintCast + | UnicodeCast + | Unset + | Use + | Var + | While + | Yield + | From + + | Get + | Set + | Call + | CallStatic + | Constructor + | Destruct + | Wakeup + | Sleep + | Autoload + | IsSet__ + | Unset__ + | ToString__ + | Invoke + | SetState + | Clone__ + | DebugInfo + | Namespace__ + | Class__ + | Traic__ + | Function__ + | Method__ + | Line__ + | File__ + | Dir__ + ; + +memberModifier + : Public + | Protected + | Private + | Static + | Abstract + | Final + ; + +magicConstant + : Namespace__ + | Class__ + | Traic__ + | Function__ + | Method__ + | Line__ + | File__ + | Dir__ + ; + +magicMethod + : Get + | Set + | Call + | CallStatic + | Constructor + | Destruct + | Wakeup + | Sleep + | Autoload + | IsSet__ + | Unset__ + | ToString__ + | Invoke + | SetState + | Clone__ + | DebugInfo + ; + +primitiveType + : BoolType + | IntType + | Int64Type + | DoubleType + | StringType + | Resource + | ObjectType + | Array + ; + +castOperation + : BoolType + | Int8Cast + | Int16Cast + | IntType + | Int64Type + | UintCast + | DoubleCast + | DoubleType + | FloatCast + | StringType + | BinaryCast + | UnicodeCast + | Array + | ObjectType + | Resource + | Unset + ;