Skip to content

Commit

Permalink
Port php grammar to Antlr4ng.
Browse files Browse the repository at this point in the history
  • Loading branch information
kaby76 committed Mar 31, 2024
1 parent c7883a5 commit 94b669c
Show file tree
Hide file tree
Showing 4 changed files with 187 additions and 3 deletions.
153 changes: 153 additions & 0 deletions php/Antlr4ng/PhpLexerBase.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import {CommonToken, Lexer, Token, CharStream} from "antlr4ng";
import { PhpParser } from "./PhpParser.js";
import { PhpLexer } from "./PhpLexer.js";

export default abstract class PhpLexerBase extends Lexer {
private AspTags: boolean;
protected _scriptTag: boolean;
protected _styleTag: boolean;
private _heredocIdentifier: string | undefined;
private _prevTokenType: number;
private _htmlNameText: string | undefined;
private _phpScript: boolean;
private _insideString: boolean;
protected _mode: number
protected _channel: number

protected static MIN_CHAR_VALUE = 0x0000;
protected static MAX_CHAR_VALUE = 0x10FFFF;

constructor(input: CharStream) {
super(input);
this.AspTags = true;
this._scriptTag = false;
this._styleTag = false;
this._heredocIdentifier = undefined;
this._prevTokenType = 0;
this._htmlNameText = undefined;
this._phpScript = false;
this._insideString = false;
}

nextToken() {
let token = super.nextToken()

if (token.type === PhpParser.PHPEnd || token.type === PhpLexer.PHPEndSingleLineComment) {
if (this._mode === PhpLexer.SingleLineCommentMode) {
// SingleLineCommentMode for such allowed syntax:
// // <?php echo "Hello world"; // comment ?>
this.popMode();
}
this.popMode();

if (token.text === "</script>") {
this._phpScript = false;
token.type = PhpLexer.HtmlScriptClose;
} else {
// Add semicolon to the end of statement if it is absent.
// For example: <?php echo "Hello world" ?>
if (this._prevTokenType === PhpLexer.SemiColon || this._prevTokenType === PhpLexer.Colon || this._prevTokenType === PhpLexer.OpenCurlyBracket || this._prevTokenType === PhpLexer.CloseCurlyBracket) {
token.channel = PhpLexer.SKIP;
} else {
token.type = PhpLexer.SemiColon;
token.text = ';';
}
}
}

else if (token.type === PhpLexer.HtmlName) {
this._htmlNameText = token.text
}

else if (token.type === PhpLexer.HtmlDoubleQuoteString) {
if (token.text === "php" && this._htmlNameText === "language") {
this._phpScript = true;
}
}

else if (this._mode === PhpLexer.HereDoc) {
// Heredoc and Nowdoc syntax support: http://php.net/manual/en/language.types.string.php#language.types.string.syntax.heredoc
if (token.type === PhpLexer.StartHereDoc || token.type === PhpLexer.StartNowDoc) {
this._heredocIdentifier = token.text.slice(3).trim().replace(/\'$/, '');
}

if (token.type === PhpLexer.HereDocText) {
if (this.CheckHeredocEnd(token.text)) {
this.popMode()
const heredocIdentifier = this.GetHeredocEnd(token.text)
if (token.text.trim().endsWith(';')) {
token.text = `${heredocIdentifier};\n`;
token.type = PhpLexer.SemiColon;
} else {
token = super.nextToken()
token.text = `${heredocIdentifier}\n;`;
}
}
}
}

else if (this._mode === PhpLexer.PHP) {
if (this._channel === PhpLexer.HIDDEN) {
this._prevTokenType = token.type;
}
}

return token;
}

GetHeredocEnd(text: string): string {
return text.trim().replace(/\;$/, "");
}

CheckHeredocEnd(text: string): boolean {
return this.GetHeredocEnd(text) === this._heredocIdentifier;
}

IsNewLineOrStart(pos: number): boolean {
return this.inputStream.LA(pos) <= 0 || this.inputStream.LA(pos) == '\r'.charCodeAt(0) ||
this.inputStream.LA(pos) == '\n'.charCodeAt(0)
}

PushModeOnHtmlClose() {
this.popMode();
if (this._scriptTag) {
if (!this._phpScript) {
this.pushMode(PhpLexer.SCRIPT);
} else {
this.pushMode(PhpLexer.PHP);
}
this._scriptTag = false;
} else if (this._styleTag) {
this.pushMode(PhpLexer.STYLE);
this._styleTag = false;
}
}

HasAspTags(): boolean {
return this.AspTags;
}

HasPhpScriptTag(): boolean {
return this._phpScript;
}

PopModeOnCurlyBracketClose() {
if (this._insideString) {
this._insideString = false;
this.skip;
this.popMode();
}
}

ShouldPushHereDocMode(pos: number): boolean {
return this.inputStream.LA(pos) === '\r'.charCodeAt(0) || this.inputStream.LA(pos) === '\n'.charCodeAt(0);
}

IsCurlyDollar(pos: number): boolean {
return this.inputStream.LA(pos) === '$'.charCodeAt(0);
}

SetInsideString() {
this._insideString = true
}
}
31 changes: 31 additions & 0 deletions php/Antlr4ng/transformGrammar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""The script transforms the grammar to fit for the c++ target """
import sys
import re
import shutil
from glob import glob
from pathlib import Path

def main():
"""Executes the script."""
for file in glob("./*.g4"):
transform_grammar(file)

def transform_grammar(file_path):
"""Transforms the grammar to fit for the target"""
print("Altering " + file_path)
if not Path(file_path).is_file:
print(f"Could not find file: {file_path}")
sys.exit(1)

shutil.move(file_path, file_path + ".bak")
with open(file_path + ".bak",'r', encoding="utf-8") as input_file:
with open(file_path, 'w', encoding="utf-8") as output_file:
for line in input_file:
line = re.sub(r"(\/\/ Insert here @header for C\+\+ lexer\.)",\
'@header {import PhpLexerBase from "./PhpLexerBase.js"}', line)
output_file.write(line)

print("Writing ...")

if __name__ == '__main__':
main()
4 changes: 2 additions & 2 deletions php/PhpLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ HtmlText : ~[<#]+;
XmlStart : '<?xml' -> pushMode(XML);
PHPStartEcho : PhpStartEchoFragment -> type(Echo), pushMode(PHP);
PHPStart : PhpStartFragment -> channel(SkipChannel), pushMode(PHP);
HtmlScriptOpen : '<script' { _scriptTag = true; } -> pushMode(INSIDE);
HtmlStyleOpen : '<style' { _styleTag = true; } -> pushMode(INSIDE);
HtmlScriptOpen : '<script' { this._scriptTag = true; } -> pushMode(INSIDE);
HtmlStyleOpen : '<style' { this._styleTag = true; } -> pushMode(INSIDE);
HtmlComment : '<!' '--' .*? '-->' -> channel(HIDDEN);
HtmlDtd : '<!' .*? '>';
HtmlOpen : '<' -> pushMode(INSIDE);
Expand Down
2 changes: 1 addition & 1 deletion php/desc.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8" ?>
<desc xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="../_scripts/desc.xsd">
<antlr-version>^4.10</antlr-version>
<targets>CSharp;Java;Python3</targets>
<targets>CSharp;Java;Python3;Antlr4ng</targets>
</desc>

0 comments on commit 94b669c

Please sign in to comment.