diff --git a/.gitignore b/.gitignore index 5c99ef58d6..bb845c8925 100644 --- a/.gitignore +++ b/.gitignore @@ -1,98 +1,10 @@ -# Maven build folders -target/ -# ... but not code generation targets -!tool/src/org/antlr/v4/codegen/target/ - -# Node.js (npm and typings) cached dependencies -node_modules/ -typings/ - -# Ant build folders -build/ -dist/ -lib/ -user.build.properties - -# MacOSX files -.DS_Store - -## Python, selected lines from https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -## CSharp and VisualStudio, selected lines from https://raw.githubusercontent.com/github/gitignore/master/VisualStudio.gitignore -# User-specific files -*.suo -*.user -*.userosscache -*.sln.docstates - -# User-specific files (MonoDevelop/Xamarin Studio) -*.userprefs -*.user -.vs/ -project.lock.json - -# Build results -[Dd]ebug/ -[Dd]ebugPublic/ -[Rr]elease/ -[Rr]eleases/ -x64/ -x86/ -bld/ -[Bb]in/ -[Oo]bj/ -[Ll]og/ - -# Visual Studio 2015 cache/options directory -.vs/ - -# NetBeans user configuration files -nbactions*.xml -/nbproject/private/ -*/nbproject/private/ - -# IntelliJ projects -*.iws -*.iml -.idea/ - -# Eclipse projects -.classpath -.project -.settings/ -.metadata - -# Profiler results -*.hprof - -# parrt's bash prompt data -.fetch_time_cache - -# Playground -#/tool/playground/ - -# Generated files -/out/ -/gen/ -/gen3/ -/gen4/ -/tool/playground/ -tmp/ - -# Configurable build files -bilder.py -bilder.pyc -bild.log - -bild_output.txt -runtime/Cpp/demo/generated -xcuserdata -*.jar +.idea .vscode +/target +/tests/gen/*.tokens +/tests/gen/*.interp +**/*.rs.bk +Cargo.lock # VSCode Java plugin temporary files javac-services.0.log diff --git a/.gitmodules b/.gitmodules index e69de29bb2..d79ba96f9a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "runtime/PHP"] + path = runtime/PHP + url = https://github.com/antlr/antlr-php-runtime.git +[submodule "runtime/Rust"] + path = runtime/Rust + url = https://github.com/nmeylan/antlr4rust \ No newline at end of file diff --git a/.travis/before-install-linux-rust.sh b/.travis/before-install-linux-rust.sh new file mode 100755 index 0000000000..0789550897 --- /dev/null +++ b/.travis/before-install-linux-rust.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +set -euo pipefail + +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain nightly-2020-12-23 -y +export PATH=$HOME/.cargo/bin:$PATH +( rustc --version ; cargo --version ) || true \ No newline at end of file diff --git a/.travis/run-tests-rust.sh b/.travis/run-tests-rust.sh new file mode 100755 index 0000000000..10e65fbc65 --- /dev/null +++ b/.travis/run-tests-rust.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +set -euo pipefail + +export PATH=$HOME/.cargo/bin:$PATH +mvn test -Dtest=rust.*Left* -q diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000000..cf8ca60fc8 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "antlr-rust" +version = "0.2.0-dev.2" +authors = ["Konstantin Anisimov "] +homepage = "https://github.com/rrevenantt/antlr4rust" +repository = "https://github.com/rrevenantt/antlr4rust" +documentation = "https://docs.rs/antlr-rust" +description = "ANTLR4 runtime for Rust" +readme = "README.md" +edition = "2018" +license = "BSD-3-Clause" +keywords = ["ANTLR","ANTLR4","parsing","runtime"] +categories = ["parsing"] +exclude = ["build.rs"] + +[dependencies] +lazy_static = "^1.4" +uuid = "=0.8.*" +byteorder = "^1" +murmur3 = "=0.4" # 0.5 is incompatible currently +bit-set = "=0.5.*" +once_cell = "^1.2" +#backtrace = "=0.3" +typed-arena = "^2.0" +better_any = "=0.1" + +[lib] + +#[[test]] +#name = "my_test" +#path="tests/my_test.rs" + + +[profile.release] +#opt-level = 3 +#debug = true \ No newline at end of file diff --git a/README.md b/README.md index 6d659773ce..14245c97e8 100644 --- a/README.md +++ b/README.md @@ -1,64 +1,114 @@ -# ANTLR v4 - -[![Java 7+](https://img.shields.io/badge/java-7+-4c7e9f.svg)](http://java.oracle.com) -[![License](https://img.shields.io/badge/license-BSD-blue.svg)](https://raw.githubusercontent.com/antlr/antlr4/master/LICENSE.txt) - -**Build status** - -[![Github CI Build Status (MacOSX)](https://img.shields.io/github/workflow/status/antlr/antlr4/MacOSX?label=MacOSX)](https://github.com/antlr/antlr4/actions) -[![AppVeyor CI Build Status (Windows)](https://img.shields.io/appveyor/build/parrt/antlr4?label=Windows)](https://ci.appveyor.com/project/parrt/antlr4) -[![Circle CI Build Status (Linux)](https://img.shields.io/circleci/build/gh/antlr/antlr4/master?label=Linux)](https://app.circleci.com/pipelines/github/antlr/antlr4) -[![Travis-CI Build Status (Swift-Linux)](https://img.shields.io/travis/antlr/antlr4.svg?label=Linux-Swift&branch=master)](https://travis-ci.com/github/antlr/antlr4) - -**ANTLR** (ANother Tool for Language Recognition) is a powerful parser generator for reading, processing, executing, or translating structured text or binary files. It's widely used to build languages, tools, and frameworks. From a grammar, ANTLR generates a parser that can build parse trees and also generates a listener interface (or visitor) that makes it easy to respond to the recognition of phrases of interest. - -*Given day-job constraints, my time working on this project is limited so I'll have to focus first on fixing bugs rather than changing/improving the feature set. Likely I'll do it in bursts every few months. Please do not be offended if your bug or pull request does not yield a response! --parrt* - -[![Donate](https://www.paypal.com/en_US/i/btn/x-click-butcc-donate.gif)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=BF92STRXT8F8Q) - -## Authors and major contributors - -* [Terence Parr](http://www.cs.usfca.edu/~parrt/), parrt@cs.usfca.edu -ANTLR project lead and supreme dictator for life -[University of San Francisco](http://www.usfca.edu/) -* [Sam Harwell](http://tunnelvisionlabs.com/) (Tool co-author, Java and original C# target) -* [Eric Vergnaud](https://github.com/ericvergnaud) (Javascript, Python2, Python3 targets and maintenance of C# target) -* [Peter Boyer](https://github.com/pboyer) (Go target) -* [Mike Lischke](http://www.soft-gems.net/) (C++ completed target) -* Dan McLaughlin (C++ initial target) -* David Sisson (C++ initial target and test) -* [Janyou](https://github.com/janyou) (Swift target) -* [Ewan Mellor](https://github.com/ewanmellor), [Hanzhou Shi](https://github.com/hanjoes) (Swift target merging) -* [Ben Hamilton](https://github.com/bhamiltoncx) (Full Unicode support in serialized ATN and all languages' runtimes for code points > U+FFFF) -* [Marcos Passos](https://github.com/marcospassos) (PHP target) -* [Lingyu Li](https://github.com/lingyv-li) (Dart target) - -## Useful information - -* [Release notes](https://github.com/antlr/antlr4/releases) -* [Getting started with v4](https://github.com/antlr/antlr4/blob/master/doc/getting-started.md) -* [Official site](http://www.antlr.org/) -* [Documentation](https://github.com/antlr/antlr4/blob/master/doc/index.md) -* [FAQ](https://github.com/antlr/antlr4/blob/master/doc/faq/index.md) -* [ANTLR code generation targets](https://github.com/antlr/antlr4/blob/master/doc/targets.md)
(Currently: Java, C#, Python2|3, JavaScript, Go, C++, Swift, Dart, PHP) -* [Java API](http://www.antlr.org/api/Java/index.html) -* [ANTLR v3](http://www.antlr3.org/) -* [v3 to v4 Migration, differences](https://github.com/antlr/antlr4/blob/master/doc/faq/general.md) - -You might also find the following pages useful, particularly if you want to mess around with the various target languages. +# antlr4rust +[![Crate](https://flat.badgen.net/crates/v/antlr-rust)](https://crates.io/crates/antlr_rust) +[![docs](https://flat.badgen.net/badge/docs.rs/v0.2.0-dev.2)](https://docs.rs/antlr-rust/0.2.0-dev.2) + +[ANTLR4](https://github.com/antlr/antlr4) runtime for Rust programming language. + +Tool(generator) part is currently located in rust-target branch of my antlr4 fork [rrevenantt/antlr4/tree/rust-target](https://github.com/rrevenantt/antlr4/tree/rust-target) +Latest version is automatically built to [releases](https://github.com/rrevenantt/antlr4rust/releases) on this repository. +Also you can checkout it and `mvn -DskipTests install` + +For examples you can see [grammars](grammars), [tests/gen](tests/gen) for corresponding generated code +and [tests/my_tests.rs](tests/my_test.rs) for actual usage examples + +### Implementation status + +For now development is going on in this repository +but eventually it will be merged to main ANTLR4 repo + +Currently, requires nightly version of rust. +This likely will be the case until `coerce_unsize` or some kind of coercion trait is stabilized. +There are other unstable features in use but only `CoerceUnsized` is essential. + +Remaining things before merge: + - API stabilization + - [ ] Rust api guidelines compliance + - [ ] more tests for API because it is quite different from Java + +Can be done after merge: + - more profiling and performance optimizations + - Documentation + - [ ] Some things are already documented but still far from perfect, also more links needed. + - Code quality + - [ ] Clippy sanitation + - [ ] Not all warning are fixed + - cfg to not build potentially unnecessary parts + (no Lexer if custom token stream, no ParserATNSimulator if LL(1) grammar) + - run rustfmt on generated parser +###### Long term improvements + - generate enum for labeled alternatives without redundant `Error` option + - option to generate fields instead of getters by default and make visiting based on fields + - make tree generic over pointer type and allow tree nodes to arena. + (requires GAT, otherwise it would be a problem for users that want ownership for parse tree) + - support stable rust + - support no_std(although alloc would still be required) + +### Usage + +You should use the ANTLR4 "tool" to generate a parser, that will use the ANTLR +runtime, located here. You can run it with the following command: +```bash +java -jar -Dlanguage=Rust MyGrammar.g4 +``` +For a full list of antlr4 tool options, please visit the +[tool documentation page](https://github.com/antlr/antlr4/blob/master/doc/tool-options.md). + +You can also see [build.rs](build.rs) as an example of `build.rs` configuration +to rebuild parser automatically if grammar file was changed. + +Then add following to `Cargo.toml` of the crate from which generated parser +is going to be used: +```toml +[dependencies] +antlr-rust = "=0.2.0-dev.1" +``` +and `#![feature(try_blocks)]` in your project root module. -* [How to build ANTLR itself](https://github.com/antlr/antlr4/blob/master/doc/building-antlr.md) -* [How we create and deploy an ANTLR release](https://github.com/antlr/antlr4/blob/master/doc/releasing-antlr.md) - -## The Definitive ANTLR 4 Reference - -Programmers run into parsing problems all the time. Whether it’s a data format like JSON, a network protocol like SMTP, a server configuration file for Apache, a PostScript/PDF file, or a simple spreadsheet macro language—ANTLR v4 and this book will demystify the process. ANTLR v4 has been rewritten from scratch to make it easier than ever to build parsers and the language applications built on top. This completely rewritten new edition of the bestselling Definitive ANTLR Reference shows you how to take advantage of these new features. - -You can buy the book [The Definitive ANTLR 4 Reference](http://amzn.com/1934356999) at amazon or an [electronic version at the publisher's site](https://pragprog.com/book/tpantlr2/the-definitive-antlr-4-reference). +### Parse Tree structure + +It is possible to generate idiomatic Rust syntax trees. For this you would need to use labels feature of ANTLR tool. +You can see [Labels](grammars/Labels.g4) grammar for example. +Consider following rule : +```text +e : a=e op='*' b=e # mult + | left=e '+' b=e # add + +``` +For such rule ANTLR will generate enum `EContextAll` containing `mult` and `add` alternatives, +so you will be able to match on them in your code. +Also corresponding struct for each alternative will contain fields you labeled. +I.e. for `MultContext` struct will contain `a` and `b` fields containing child subtrees and +`op` field with `TerminalNode` type which corresponds to individual `Token`. +It also is possible to disable generic parse tree creation to keep only selected children via +`parser.build_parse_trees = false`, but unfortunately currently it will prevent visitors from working. + +### Differences with Java +Although Rust runtime API has been made as close as possible to Java, +there are quite some differences because Rust is not an OOP language and is much more explicit. + + - If you are using labeled alternatives, + struct generated for the rule is an enum with variant for each alternative + - Parser needs to have ownership for listeners, but it is possible to get listener back via `ListenerId` + otherwise `ParseTreeWalker` should be used. + - In embedded actions to access parser you should use `recog` variable instead of `self`/`this`. + This is because predicates have to be inserted into two syntactically different places in generated parser + and in one of them it is impossible to have parser as `self`. + - str based `InputStream` have different index behavior when there are unicode characters. + If you need exactly the same behavior, use `[u32]` based `InputStream`, or implement custom `CharStream`. + - In actions you have to escape `'` in rust lifetimes with `\ ` because ANTLR considers them as strings, e.g. `Struct<\'lifetime>` + - To make custom tokens you should use `@tokenfactory` custom action, instead of usual `TokenLabelType` parser option. + ANTLR parser options can accept only single identifiers while Rust target needs know about lifetime as well. + Also in Rust target `TokenFactory` is the way to specify token type. As example you can see [CSV](grammars/CSV.g4) test grammar. + - All rule context variables (rule argument or rule return) should implement `Default + Clone`. + +### Unsafe +Currently, unsafe is used only for downcasting (through separate crate) +and to update data inside Rc via `get_mut_unchecked`(returned mutable reference is used immediately and not stored anywhere) -You will find the [Book source code](http://pragprog.com/titles/tpantlr2/source_code) useful. +### Versioning +In addition to usual Rust semantic versioning, +patch version changes of the crate should not require updating of generator part + +## Licence -## Additional grammars -[This repository](https://github.com/antlr/grammars-v4) is a collection of grammars without actions where the -root directory name is the all-lowercase name of the language parsed -by the grammar. For example, java, cpp, csharp, c, etc... +BSD 3-clause diff --git a/build.rs b/build.rs new file mode 100644 index 0000000000..60efa3af8a --- /dev/null +++ b/build.rs @@ -0,0 +1,62 @@ +use std::convert::TryInto; +use std::env; +use std::env::VarError; +use std::error::Error; +use std::fs::{read_dir, DirEntry, File}; +use std::io::Write; +use std::path::Path; +use std::process::Command; + +fn main() { + let grammars = vec![ + "CSV", + "ReferenceToATN", + "XMLLexer", + "SimpleLR", + "Labels", + "FHIRPath", + ]; + let additional_args = vec![Some("-visitor"), None, None, None, None]; + let antlr_path = "/home/rrevenantt/dev/antlr4/tool/target/antlr4-4.8-2-SNAPSHOT-complete.jar"; + + for (grammar, arg) in grammars.into_iter().zip(additional_args) { + //ignoring error because we do not need to run anything when deploying to crates.io + let _ = gen_for_grammar(grammar, antlr_path, arg); + } + + println!("cargo:rerun-if-changed=build.rs"); + + println!("cargo:rerun-if-changed=/home/rrevenantt/dev/antlr4/tool/target/antlr4-4.8-2-SNAPSHOT-complete.jar"); +} + +fn gen_for_grammar( + grammar_file_name: &str, + antlr_path: &str, + additional_arg: Option<&str>, +) -> Result<(), Box> { + // let out_dir = env::var("OUT_DIR").unwrap(); + // let dest_path = Path::new(&out_dir); + + let input = env::current_dir().unwrap().join("grammars"); + let file_name = grammar_file_name.to_owned() + ".g4"; + + let c = Command::new("java") + .current_dir(input) + .arg("-cp") + .arg(antlr_path) + .arg("org.antlr.v4.Tool") + .arg("-Dlanguage=Rust") + .arg("-o") + .arg("../tests/gen") + .arg(&file_name) + .args(additional_arg) + .spawn() + .expect("antlr tool failed to start") + .wait_with_output()?; + // .unwrap() + // .stdout; + // eprintln!("xx{}",String::from_utf8(x).unwrap()); + + println!("cargo:rerun-if-changed=grammars/{}", file_name); + Ok(()) +} diff --git a/doc/rust-target.md b/doc/rust-target.md new file mode 100644 index 0000000000..1e7ed5a0a8 --- /dev/null +++ b/doc/rust-target.md @@ -0,0 +1,44 @@ +# ANTLR4 Runtime for Rust + +### First steps + +#### 1. Install ANTLR4 + +[The getting started guide](https://github.com/antlr/antlr4/blob/master/doc/getting-started.md) +should get you started. + +#### 2. Install the Rust ANTLR runtime + +Each target language for ANTLR has a runtime package for running parser +generated by ANTLR4. The runtime provides a common set of tools for using your parser. + +Add antlr-rust and lazy_static dependencies to your `Cargo.toml`: + +```toml +[dependencies] +lazy_static = "1.4" +antlr-rust = "0.1.0" +``` + +#### 3. Generate your parser + +You use the ANTLR4 "tool" to generate a parser. These will reference the ANTLR +runtime, installed above. + +Suppose you're using a UNIX system and have set up an alias for the ANTLR4 tool +as described in [the getting started guide](https://github.com/antlr/antlr4/blob/master/doc/getting-started.md). +To generate your Rust parser, run the following command: + +```bash +antlr4 -Dlanguage=Rust MyGrammar.g4 +``` + +For a full list of antlr4 tool options, please visit the +[tool documentation page](https://github.com/antlr/antlr4/blob/master/doc/tool-options.md). + +### Next + +More information in the Rust target [README](todo) +and in the antlr-rust crate [documentation](https://doc.rs/antlr-rust) + + diff --git a/doc/targets.md b/doc/targets.md index ad6e7dba94..f1f2521a68 100644 --- a/doc/targets.md +++ b/doc/targets.md @@ -10,7 +10,7 @@ This page lists the available and upcoming ANTLR runtimes. Please note that you * [C++](cpp-target.md) * [Swift](swift-target.md) * [PHP](php-target.md) -* [Dart](dart-target.md) +* [Rust](rust-target.md) (Unstable) ## Target feature parity diff --git a/grammars/Perf.g4 b/grammars/Perf.g4 new file mode 100644 index 0000000000..1f8333e2ca --- /dev/null +++ b/grammars/Perf.g4 @@ -0,0 +1,18 @@ +grammar Perf; + +stat : expr ';' + | expr '.' + ; + +expr + : ID + | 'not' expr + | expr 'and' expr + | expr 'or' expr + | '(' ID ')' expr + | expr '?' expr ':' expr + | 'between' expr 'and' expr + ; + +ID: [a-zA-Z_][a-zA-Z_0-9]*; +WS: [ \t\n\r\f]+ -> skip; \ No newline at end of file diff --git a/runtime-testsuite/pom.xml b/runtime-testsuite/pom.xml index 5b4e304440..b47cb833f8 100644 --- a/runtime-testsuite/pom.xml +++ b/runtime-testsuite/pom.xml @@ -98,6 +98,7 @@ **/csharp/Test*.java **/java/Test*.java + **/rust/Test*.java **/go/Test*.java **/javascript/Test*.java **/python2/Test*.java diff --git a/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/Rust.test.stg~HEAD b/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/Rust.test.stg~HEAD new file mode 100644 index 0000000000..3d508a19e1 --- /dev/null +++ b/runtime-testsuite/resources/org/antlr/v4/test/runtime/templates/Rust.test.stg~HEAD @@ -0,0 +1,308 @@ +writeln(s) ::= < );>> +write(s) ::= < );>> +writeList(s) ::= << std::cout \<\< \<\< std::endl;>> + +False() ::= "false" +True() ::= "true" +Not(v) ::= "!" +Assert(s) ::= "" +//Cast(t,v) ::= "cast::\<_,>(&*)" +Cast(t,v) ::= <<(if let EContextAll::(ctx) = &* { ctx } else { panic!("can't cast")} )>> +Append(a,b) ::= ".to_string() + &().to_string()" +AppendStr(a,b) ::= <%.to_string() + &()%> +Concat(a,b) ::= "" + +DeclareLocal(s,v) ::= "let mut = " + +AssertIsList(v) ::= "" //not sure if possible in Rust +AssignLocal(s,v) ::= " = ;" + +InitIntVar(n,v) ::= "let = ;" +InitIntMember(n,v) ::= <<} +@parser::fields {: isize,} +@parser::init {: , +>> +InitBooleanMember(n,v) ::= <<} +@parser::fields {: bool,} +@parser::init {: , +>> + +IntArg(v) ::= ": isize" +VarRef(v) ::= "" + +GetMember(n) ::= "recog." +SetMember(n,v) ::= "recog. = ;" +AddMember(n,v) ::= "recog. += ;" +PlusMember(n,v) ::= "recog. + " +MemberEquals(n,v) ::= "recog. == " +ModMemberEquals(n,m,v) ::= "recog. % == " +ModMemberNotEquals(n,m,v) ::= "recog. % != " + +DumpDFA() ::= "recog.dump_dfa();" +Pass() ::= "/* do nothing */" + +StringList() ::= "Vec\" +BuildParseTrees() ::= "recog.build_parse_trees = true;" +BailErrorStrategy() ::= +< >().unwrap() = Box::new(antlr_rust::error_strategy::BailErrorStrategy::new()); +>> + +ToStringTree(s) ::= ".to_string_tree(&recog.base)" +Column() ::= "recog.get_char_position_in_line()" +Text() ::= "recog.get_text()" +ValEquals(a,b) ::= " == " +TextEquals(a) ::= "recog.get_text() == \"\"" +PlusText(a) ::="\"\".to_owned() + &recog.get_text()" +InputText() ::= "recog.base.input.get_all_text()" +LTEquals(i, v) ::= "recog.input.lt().unwrap().get_text() == " +LANotEquals(i, v) ::= "recog.input.la() != " +TokenStartColumnEquals(i) ::= "recog.token_start_column == " + +ImportListener(X) ::= "" + +GetExpectedTokenNames() ::= "recog.base.get_expected_tokens().to_token_string(recog.get_vocabulary())" + +RuleInvocationStack() ::= "format!(\"[{}]\",recog.get_rule_invocation_stack().join(\", \"))" + +LL_EXACT_AMBIG_DETECTION() ::= <> + +//ParserToken(parser, token) ::= <%.%> +ParserToken(parser, token) ::= <%self::%> + +Production(p) ::= <%

%> + +Result(r) ::= <%%> + +ParserPropertyMember() ::= << +@members { +fn Property(&self) -> bool { + return true; +} +} +>> + +ParserPropertyCall(p, call) ::= "recog." + +// unsupported +TreeNodeWithAltNumField(X) ::= << +@parser::definitions { +struct MyRuleNode\(BaseParserRuleContext\ >); +struct MyRuleNodeCtx\(T,std::cell::Cell\); +use core::fmt::{Formatter,Debug}; +impl\ Debug for MyRuleNode\{ + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + f.write_str(std::any::type_name::\()) + } +} + +impl\ CustomRuleContext for MyRuleNodeCtx\{ + fn get_alt_number(&self) -> isize {self.1.get()} + fn set_alt_number(&self, _alt_number: isize) {self.1.set(_alt_number)} +} + + +impl\ Deref for MyRuleNode\{ + type Target = dyn ParserRuleContext; + fn deref(&self) -> &Self::Target {&self.0} +} +impl\ DerefMut for MyRuleNode\{ + fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } +} +impl\ antlr_rust::parser_rule_context::DerefSeal for MyRuleNode\{} +} +>> + +PositionAdjustingLexer() ::= << + fn handleAcceptPositionForIdentifier(this: &mut BaseLexer\) { + let tokenText = this.get_text(); + let identifierLength = tokenText.chars().position(|it| !Self::isIdentifierChar(it)).unwrap() as isize; +// while (identifierLength \< tokenText.len() as isize && Self::isIdentifierChar(tokenText[identifierLength as usize])) { +// identifierLength+=1; +// } + + if (this.get_input_stream().index() > this.token_start_char_index + identifierLength) { + let offset = identifierLength - 1; + let mut input = this.input.take().unwrap(); + this.interpreter.as_mut().unwrap().resetAcceptPosition(&mut input, + this.token_start_char_index + offset, this.token_start_line, this.token_start_column + offset); + this.input = Some(input); + //return; + }; + + } + + fn handleAcceptPositionForKeyword(this: &mut BaseLexer\, keyword: &str) { + let mut input = this.input.take().unwrap(); + if (input.index() > this.token_start_char_index + keyword.len() as isize) { + let offset = keyword.len() as isize - 1; + this.interpreter.as_mut().unwrap().resetAcceptPosition(&mut input, + this.token_start_char_index + offset, this.token_start_line, this.token_start_column + offset); + //return true; + } + this.input = Some(input); + + //return false; + } + + fn isIdentifierChar(c: char) -> bool{ + return c.is_ascii_alphanumeric() || c == '_'; + } +>> + +PositionAdjustingLexerDef() ::= << +//@definitions +trait PositionAdjustingLexerATNSim { + fn resetAcceptPosition(&mut self, input: &mut dyn CharStream, index: isize, line: isize, charPositionInLine: isize); + } +impl PositionAdjustingLexerATNSim for antlr_rust::lexer_atn_simulator::LexerATNSimulator{ + fn resetAcceptPosition(&mut self, input: &mut dyn CharStream, index: isize, line: isize, charPositionInLine: isize) { + input.seek(index); + self.set_line(line); + self.set_char_position_in_line(charPositionInLine); + self.consume(input); + } +} + +} +@extend{ + + fn before_emit(lexer:&mut BaseLexer\) { + match (lexer.token_type) { + TOKENS => Self::handleAcceptPositionForKeyword(lexer,"tokens"), + LABEL => Self::handleAcceptPositionForIdentifier(lexer), + _ => {} + } + } + +>> + +BasicListener(X) ::= << +@parser::definitions { +use antlr_rust::tree::ParseTreeListener; +struct TestListener; + +impl ParseTreeListener for TestListener{ + fn visit_terminal(&mut self, node: &TerminalNode) { + println!("{}",node.symbol.get_text()); + } +} +impl TListener for TestListener{} +} +>> + +WalkListener(s) ::= << +let walker = antlr_rust::tree::ParseTreeWalker; +let mut listener:Box\ = Box::new(TestListener); +walker.walk(&mut listener, .deref()); +>> + +TokenGetterListener(X) ::= << +@parser::definitions { +use antlr_rust::tree::ParseTreeListener; +use crate::antlr_rust::tree::Tree; +struct TestListener; + +impl TListener for TestListener{ + fn exit_a(&mut self, node: &AContext) { + if node.get_children().len() == 2 { + println!("{} {} {:?}",node.INT(0).unwrap().symbol.get_text(),node.INT(1).unwrap().symbol.get_text(),node.INT_all()) + } else{ + println!("{}",node.ID().unwrap().symbol); + } + } +} +impl ParseTreeListener for TestListener{} +} +>> + +RuleGetterListener(X) ::= << +@parser::definitions { + +use antlr_rust::tree::ParseTreeListener; +use crate::antlr_rust::tree::Tree; +struct TestListener; + +impl TListener for TestListener{ + fn exit_a(&mut self, node: &AContext) { + if node.get_children().len() == 2 { + println!("{} {} {}", + node.b(0).get_start().get_text(), + node.b(1).get_start().get_text(), + node.b_all()[0].get_start().get_text(), + ) + } else{ + println!("{}",node.b(0).get_start().get_text()); + } + } +} +impl ParseTreeListener for TestListener{} +} +>> + + +LRListener(X) ::= << +@parser::definitions { +use antlr_rust::tree::ParseTreeListener; +use antlr_rust::tree::Tree; +struct TestListener; + +impl ParseTreeListener for TestListener{} +impl TListener for TestListener{ + fn exit_e(&mut self, ctx: &EContext) { + if ctx.get_children().len() == 3 { + println!("{} {} {}",ctx.e(0).get_start().get_text(),ctx.e(1).get_start().get_text(),ctx.e_all()[0].get_start().get_text()); + } else { + println!("{}",ctx.INT().unwrap().symbol.get_text()); + } + } +} +} +>> + +LRWithLabelsListener(X) ::= << +@parser::definitions { + +use antlr_rust::tree::ParseTreeListener; +use antlr_rust::tree::Tree; +struct TestListener; + +impl ParseTreeListener for TestListener{} +impl TListener for TestListener{ + fn exit_Call(&mut self, ctx: &CallContext) { + println!("{} {}",ctx.e().get_start().get_text(),ctx.eList().to_string(None,None)); + } + fn exit_Int(&mut self, ctx: &IntContext){ + println!("{}",ctx.INT().unwrap().symbol.get_text()); + } +} +} +>> + +DeclareContextListGettersFunction() ::= << +fn foo() { + let s:SContext = unimplemented!(); + let a:Vec\ > = s.a_all(); + let b:Vec\ > = s.b_all(); +} +>> + +Declare_foo() ::= <> + +Invoke_foo() ::= "recog.foo();" + +Declare_pred() ::= << +pub fn pred(&self,v:bool)-> bool { + println!("eval={}",v); + return v; +} +>> + +Invoke_pred(v) ::= <)>> + +ContextRuleFunction(ctx, rule) ::= "..as_ref().unwrap()" +StringType() ::= "String" +ContextMember(ctx, subctx, member) ::= "..get_()" diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/CompositeParsersDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/CompositeParsersDescriptors.java index 6134b265b9..5d2f398e82 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/CompositeParsersDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/CompositeParsersDescriptors.java @@ -227,7 +227,7 @@ public static class DelegatorInvokesDelegateRuleWithArgs extends BaseCompositePa /** parser grammar S; - a[int x] returns [int y] : B {} {$y=1000;} ; + a[] returns [] : B {} {$y=1000;} ; */ @CommentHasStringValue public String slaveGrammarS; diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/FullContextParsingDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/FullContextParsingDescriptors.java index fb71b38632..3453c1ae01 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/FullContextParsingDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/FullContextParsingDescriptors.java @@ -171,7 +171,7 @@ public static abstract class ExprAmbiguity extends BaseDiagnosticParserTestDescr s @init {} : expr[0] {}; - expr[int _p] + expr[] : ID ( {5 >= $_p}? '*' expr[6] diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LeftRecursionDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LeftRecursionDescriptors.java index 01e868d6e1..43f0f66ec9 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LeftRecursionDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/LeftRecursionDescriptors.java @@ -524,7 +524,7 @@ public static abstract class MultipleAlternativesWithCommonLabel extends BasePar /** grammar T; s : e {}; - e returns [int v] + e returns [] : e '*' e {$v = (0)}, {})> * (1)}, {})>;} # binary | e '+' e {$v = (0)}, {})> + (1)}, {})>;} # binary | INT {$v = $INT.int;} # anInt @@ -672,11 +672,11 @@ public static abstract class ReturnValueAndActionsAndLabels extends BaseParserTe /** grammar T; s : q=e {}; - e returns [int v] + e returns [] : a=e op='*' b=e {$v = $a.v * $b.v;} # mult | a=e '+' b=e {$v = $a.v + $b.v;} # add | INT {$v = $INT.int;} # anInt - | '(' x=e ')' {$v = $x.v;} # parens + | '(' x=e ')' {$v = 0 + $x.v;} # parens | x=e '++' {$v = $x.v+1;} # inc | e '--' # dec | ID {$v = 3;} # anID @@ -822,11 +822,11 @@ public static abstract class ReturnValueAndActions extends BaseParserTestDescrip /** grammar T; s : e {}; - e returns [int v, ignored] + e returns [, ignored] : a=e '*' b=e {$v = $a.v * $b.v;} | a=e '+' b=e {$v = $a.v + $b.v;} | INT {$v = $INT.int;} - | '(' x=e ')' {$v = $x.v;} + | '(' x=e ')' {$v = 0 + $x.v;} ; INT : '0'..'9'+ ; WS : (' '|'\n') -> skip ; diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParseTreesDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParseTreesDescriptors.java index 5bd26693c7..152effcd84 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParseTreesDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParseTreesDescriptors.java @@ -44,6 +44,10 @@ public static class AltNum extends BaseParserTestDescriptor { @CommentHasStringValue public String grammar; + @Override + public boolean ignore(String targetName) { + return targetName.equals("Rust"); + } } public static class ExtraToken extends BaseParserTestDescriptor { @@ -112,7 +116,7 @@ public static class ExtraTokensAndAltLabels extends BaseParserTestDescriptor { @Override public boolean ignore(String targetName) { - return !targetName.matches("Java|Python2|Python3|Node|Swift|CSharp|Dart"); + return !targetName.matches("Java|Python2|Python3|Node|Swift|CSharp|Rust|Dart"); } } diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserErrorsDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserErrorsDescriptors.java index b6f1c46860..57a65da4a1 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserErrorsDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserErrorsDescriptors.java @@ -639,7 +639,7 @@ public static class ExtraneousInput extends BaseParserTestDescriptor { @Override public boolean ignore(String targetName) { - return !"Java".equals(targetName) && !"Swift".equals(targetName) && !"Dart".equals(targetName); + return !"Java".equals(targetName) && !"Swift".equals(targetName) && !"Rust".equals(targetName) && !"Dart".equals(targetName); } } } diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserExecDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserExecDescriptors.java index d2b0931f09..b1de6df4ab 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserExecDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/ParserExecDescriptors.java @@ -693,7 +693,7 @@ public static class PredicatedIfIfElse extends BaseParserTestDescriptor { grammar T; s : stmt EOF ; stmt : ifStmt | ID; - ifStmt : 'if' ID stmt ('else' stmt | { })> }?); + ifStmt : 'if' ID stmt ('else' stmt | { })> }?); ELSE : 'else'; ID : [a-zA-Z]+; WS : [ \\n\\t]+ -> skip; diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java index 69dbd9a07d..495117832d 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/PerformanceDescriptors.java @@ -113,7 +113,7 @@ public static abstract class DropLoopEntryBranchInLRRule extends BaseParserTestD @Override public boolean ignore(String targetName) { - return !Arrays.asList("Java", "CSharp", "Python2", "Python3", "Node", "Cpp", "Swift", "Dart").contains(targetName); + return !Arrays.asList("Java", "CSharp", "Python2", "Python3", "Node", "Cpp", "Swift", "Rust", "Dart").contains(targetName); } } @@ -199,7 +199,7 @@ public static class DropLoopEntryBranchInLRRule_4 extends DropLoopEntryBranchInL @Override public boolean ignore(String targetName) { // passes, but still too slow in Python and JavaScript - return !Arrays.asList("Java", "CSharp", "Cpp", "Swift", "Dart").contains(targetName); + return !Arrays.asList("Java", "CSharp", "Cpp", "Swift", "Rust", "Dart").contains(targetName); } } diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/SemPredEvalParserDescriptors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/SemPredEvalParserDescriptors.java index b09f074881..3993c94106 100644 --- a/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/SemPredEvalParserDescriptors.java +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/descriptors/SemPredEvalParserDescriptors.java @@ -90,7 +90,7 @@ public static class AtomWithClosureInTranslatedLRRule extends BaseParserTestDesc /** grammar T; start : e[0] EOF; - e[int _p] + e[] : ( 'a' | 'b'+ ) ( {3 >= $_p}? '+' e[4] )* ; @@ -100,14 +100,15 @@ public static class AtomWithClosureInTranslatedLRRule extends BaseParserTestDesc } - /** We cannot collect predicates that are dependent on local context if - * we are doing a global follow. They appear as if they were not there at all. + /** + * We cannot collect predicates that are dependent on local context if + * we are doing a global follow. They appear as if they were not there at all. */ - public static class DepedentPredsInGlobalFOLLOW extends BaseParserTestDescriptor { + public static class DependentPredsInGlobalFOLLOW extends BaseParserTestDescriptor { public String input = "a!"; /** - eval=true - parse + eval=true + parse */ @CommentHasStringValue public String output; @@ -122,8 +123,8 @@ public static class DepedentPredsInGlobalFOLLOW extends BaseParserTestDescriptor } s : a[99] ; - a[int i] : e {}? {} '!' ; - b[int i] : e {}? ID ; + a[] : e {}? {} '!' ; + b[] : e {}? ID ; e : ID | ; // non-LL(1) so we use ATN ID : 'a'..'z'+ ; INT : '0'..'9'+; @@ -269,7 +270,7 @@ public static abstract class PredFromAltTestedInLoopBack extends BaseParserTestD @after {} : para para EOF ; para: paraContent NL NL ; - paraContent : ('s'|'x'|{})>}? NL)+ ; + paraContent : ('s'|'x'|{})>}? NL)+ ; NL : '\n' ; s : 's' ; X : 'x' ; @@ -291,7 +292,7 @@ public static class PredFromAltTestedInLoopBack_1 extends PredFromAltTestedInLoo @Override public boolean ignore(String targetName) { - return !"Java".equals(targetName) && !"Swift".equals(targetName); + return !"Java".equals(targetName) && !"Swift".equals(targetName) && !"Rust".equals(targetName); } } @@ -355,7 +356,7 @@ public static class PredicateDependentOnArg extends BaseParserTestDescriptor { grammar T; @parser::members {} s : a[2] a[1]; - a[int i] + a[] : {}? ID {} | {}? ID {} ; @@ -389,7 +390,7 @@ public static class PredicateDependentOnArg2 extends BaseParserTestDescriptor { grammar T; @parser::members {} s : a[2] a[1]; - a[int i] + a[] : {}? ID | {}? ID ; diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/BaseRustTest.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/BaseRustTest.java new file mode 120000 index 0000000000..b65b090406 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/BaseRustTest.java @@ -0,0 +1 @@ +../../../../../../../../runtime/Rust/templates/BaseRustTest.java \ No newline at end of file diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestCompositeLexers.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestCompositeLexers.java new file mode 100644 index 0000000000..b4afcae6d1 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestCompositeLexers.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.CompositeLexersDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestCompositeLexers extends BaseRuntimeTest { + public TestCompositeLexers(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(CompositeLexersDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestCompositeParsers.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestCompositeParsers.java new file mode 100644 index 0000000000..bdf55028f4 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestCompositeParsers.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.CompositeParsersDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestCompositeParsers extends BaseRuntimeTest { + public TestCompositeParsers(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(CompositeParsersDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestFullContextParsing.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestFullContextParsing.java new file mode 100644 index 0000000000..a6ff193914 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestFullContextParsing.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.FullContextParsingDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestFullContextParsing extends BaseRuntimeTest { + public TestFullContextParsing(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(FullContextParsingDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestLeftRecursion.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestLeftRecursion.java new file mode 100644 index 0000000000..230d10d45a --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestLeftRecursion.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.LeftRecursionDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestLeftRecursion extends BaseRuntimeTest { + public TestLeftRecursion(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(LeftRecursionDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestLexerErrors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestLexerErrors.java new file mode 100644 index 0000000000..3d46210464 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestLexerErrors.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.LexerErrorsDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestLexerErrors extends BaseRuntimeTest { + public TestLexerErrors(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(LexerErrorsDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestLexerExec.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestLexerExec.java new file mode 100644 index 0000000000..6ab4ab1acd --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestLexerExec.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.LexerExecDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestLexerExec extends BaseRuntimeTest { + public TestLexerExec(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(LexerExecDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestListeners.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestListeners.java new file mode 100644 index 0000000000..1318d7799c --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestListeners.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.ListenersDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestListeners extends BaseRuntimeTest { + public TestListeners(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(ListenersDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestParseTrees.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestParseTrees.java new file mode 100644 index 0000000000..b36cef9186 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestParseTrees.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.ParseTreesDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestParseTrees extends BaseRuntimeTest { + public TestParseTrees(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(ParseTreesDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestParserErrors.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestParserErrors.java new file mode 100644 index 0000000000..5ab7d8d7c5 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestParserErrors.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.ParserErrorsDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestParserErrors extends BaseRuntimeTest { + public TestParserErrors(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(ParserErrorsDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestParserExec.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestParserExec.java new file mode 100644 index 0000000000..7adf9ecb4e --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestParserExec.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.ParserExecDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestParserExec extends BaseRuntimeTest { + public TestParserExec(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(ParserExecDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestPerformance.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestPerformance.java new file mode 100644 index 0000000000..475910665d --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestPerformance.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.PerformanceDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestPerformance extends BaseRuntimeTest { + public TestPerformance(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + ((BaseRustTest) this.delegate).cargo_options = "--release"; + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(PerformanceDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestSemPredEvalLexer.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestSemPredEvalLexer.java new file mode 100644 index 0000000000..8b11dc1edd --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestSemPredEvalLexer.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.SemPredEvalLexerDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestSemPredEvalLexer extends BaseRuntimeTest { + public TestSemPredEvalLexer(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(SemPredEvalLexerDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestSemPredEvalParser.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestSemPredEvalParser.java new file mode 100644 index 0000000000..16619de1f5 --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestSemPredEvalParser.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.SemPredEvalParserDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestSemPredEvalParser extends BaseRuntimeTest { + public TestSemPredEvalParser(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(SemPredEvalParserDescriptors.class, "Rust"); + } +} diff --git a/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestSets.java b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestSets.java new file mode 100644 index 0000000000..774c3ad12f --- /dev/null +++ b/runtime-testsuite/test/org/antlr/v4/test/runtime/rust/TestSets.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.test.runtime.rust; + +import org.antlr.v4.test.runtime.BaseRuntimeTest; +import org.antlr.v4.test.runtime.RuntimeTestDescriptor; +import org.antlr.v4.test.runtime.descriptors.SetsDescriptors; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestSets extends BaseRuntimeTest { + public TestSets(RuntimeTestDescriptor descriptor) { + super(descriptor, new BaseRustTest()); + } + + @Parameterized.Parameters(name = "{0}") + public static RuntimeTestDescriptor[] getAllTestDescriptors() { + return BaseRuntimeTest.getRuntimeTestDescriptors(SetsDescriptors.class, "Rust"); + } +} diff --git a/runtime/Rust b/runtime/Rust new file mode 160000 index 0000000000..829c1d11f7 --- /dev/null +++ b/runtime/Rust @@ -0,0 +1 @@ +Subproject commit 829c1d11f70ccef61bb9d47d8e1addc34ed17b02 diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000000..a99adee415 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,2 @@ +edition = "2018" +fn_single_line = true \ No newline at end of file diff --git a/src/atn_config_set.rs b/src/atn_config_set.rs new file mode 100644 index 0000000000..ddbb5072fe --- /dev/null +++ b/src/atn_config_set.rs @@ -0,0 +1,229 @@ +use std::cmp::max; +use std::collections::HashMap; +use std::fmt::{Debug, Error, Formatter}; +use std::hash::{BuildHasher, Hash, Hasher}; +use std::ops::Deref; + +use bit_set::BitSet; +use murmur3::murmur3_32::MurmurHasher; + +use crate::atn_config::ATNConfig; +use crate::atn_simulator::IATNSimulator; +use crate::atn_state::ATNStateRef; +use crate::parser_atn_simulator::MergeCache; +use crate::prediction_context::{MurmurHasherBuilder, PredictionContext}; +use crate::semantic_context::SemanticContext; + +pub struct ATNConfigSet { + cached_hash: u64, + + //todo looks like we need only iteration for configs + // so i think we can replace configs and lookup with indexhashset + config_lookup: HashMap, + + //todo remove box? + pub(crate) configs: Vec>, + + pub(crate) conflicting_alts: BitSet, + + dips_into_outer_context: bool, + + full_ctx: bool, + + has_semantic_context: bool, + + read_only: bool, + + unique_alt: isize, + + /// creates key for lookup + /// Key::Full - for Lexer + /// Key::Partial - for Parser + hasher: fn(&ATNConfig) -> Key, +} + +#[derive(Eq, PartialEq)] +enum Key { + Full(ATNConfig), + Partial(i32, ATNStateRef, isize, SemanticContext), +} + +impl Hash for Key { + fn hash(&self, state: &mut H) { + match self { + Key::Full(x) => x.hash(state), + Key::Partial(hash, _, _, _) => state.write_i32(*hash), + } + } +} + +impl Debug for ATNConfigSet { + fn fmt(&self, _f: &mut Formatter<'_>) -> Result<(), Error> { + _f.write_str("ATNConfigSet")?; + _f.debug_list().entries(self.configs.iter()).finish()?; + if self.has_semantic_context { + _f.write_str(",hasSemanticContext=true")? + } + if self.conflicting_alts.is_empty() { + _f.write_fmt(format_args!(",uniqueAlt={}", self.unique_alt)) + } else { + _f.write_fmt(format_args!(",conflictingAlts={:?}", self.conflicting_alts)) + } + } +} + +impl PartialEq for ATNConfigSet { + fn eq(&self, other: &Self) -> bool { + self.configs == other.configs + && self.full_ctx == other.full_ctx + && self.unique_alt == other.unique_alt + && self.conflicting_alts == other.conflicting_alts + && self.has_semantic_context == other.has_semantic_context + && self.dips_into_outer_context == other.dips_into_outer_context + } +} + +impl Eq for ATNConfigSet {} + +impl Hash for ATNConfigSet { + fn hash(&self, state: &mut H) { self.configs.hash(state) } +} + +impl ATNConfigSet { + pub fn new_base_atnconfig_set(full_ctx: bool) -> ATNConfigSet { + ATNConfigSet { + cached_hash: 0, + config_lookup: HashMap::with_hasher(MurmurHasherBuilder {}), + configs: vec![], + conflicting_alts: Default::default(), + dips_into_outer_context: false, + full_ctx, + has_semantic_context: false, + read_only: false, + unique_alt: 0, + hasher: Self::local_hash_key, + } + } + + // for lexerATNConfig + pub fn new_ordered() -> ATNConfigSet { + let mut a = ATNConfigSet::new_base_atnconfig_set(true); + + a.hasher = Self::full_hash_key; + a + } + + fn full_hash_key(config: &ATNConfig) -> Key { Key::Full(config.clone()) } + + fn local_hash_key(config: &ATNConfig) -> Key { + let mut hasher = MurmurHasher::default(); + config.get_state().hash(&mut hasher); + config.get_alt().hash(&mut hasher); + config.semantic_context.hash(&mut hasher); + + Key::Partial( + hasher.finish() as i32, + config.get_state(), + config.get_alt(), + config.semantic_context.deref().clone(), + ) + } + + pub fn add_cached( + &mut self, + config: Box, + mut merge_cache: Option<&mut MergeCache>, + ) -> bool { + assert!(!self.read_only); + + if *config.semantic_context != SemanticContext::NONE { + self.has_semantic_context = true + } + + if config.get_reaches_into_outer_context() > 0 { + self.dips_into_outer_context = true + } + + let hasher = self.hasher; + let key = hasher(config.as_ref()); + + if let Some(existing) = self.config_lookup.get(&key) { + let existing = self.configs.get_mut(*existing).unwrap().as_mut(); + let root_is_wildcard = !self.full_ctx; + + let merged = PredictionContext::merge( + existing.get_context().unwrap(), + config.get_context().unwrap(), + root_is_wildcard, + &mut merge_cache, + ); + + existing.set_reaches_into_outer_context(max( + existing.get_reaches_into_outer_context(), + config.get_reaches_into_outer_context(), + )); + + if config.is_precedence_filter_suppressed() { + existing.set_precedence_filter_suppressed(true) + } + + existing.set_context(merged); + } else { + self.config_lookup.insert(key, self.configs.len()); + self.cached_hash = 0; + self.configs.push(config); + } + true + } + + pub fn add(&mut self, config: Box) -> bool { self.add_cached(config, None) } + + pub fn get_items(&self) -> impl Iterator { + self.configs.iter().map(|c| c.as_ref()) + } + + pub fn optimize_configs(&mut self, _interpreter: &dyn IATNSimulator) { + if self.configs.is_empty() { + return; + } + + for config in self.configs.iter_mut() { + let mut visited = HashMap::new(); + config.set_context( + _interpreter + .shared_context_cache() + .get_shared_context(config.get_context().unwrap(), &mut visited), + ); + } + } + + pub fn length(&self) -> usize { self.configs.len() } + + pub fn is_empty(&self) -> bool { self.configs.is_empty() } + + pub fn has_semantic_context(&self) -> bool { self.has_semantic_context } + + pub fn set_has_semantic_context(&mut self, _v: bool) { self.has_semantic_context = _v; } + + pub fn read_only(&self) -> bool { self.read_only } + + pub fn set_read_only(&mut self, _read_only: bool) { self.read_only = _read_only; } + + pub fn full_context(&self) -> bool { self.full_ctx } + + //duplicate of the self.conflicting_alts??? + pub fn get_alts(&self) -> BitSet { + self.configs.iter().fold(BitSet::new(), |mut acc, c| { + acc.insert(c.get_alt() as usize); + acc + }) + } + + pub fn get_unique_alt(&self) -> isize { self.unique_alt } + + pub fn set_unique_alt(&mut self, _v: isize) { self.unique_alt = _v } + + pub fn get_dips_into_outer_context(&self) -> bool { self.dips_into_outer_context } + + pub fn set_dips_into_outer_context(&mut self, _v: bool) { self.dips_into_outer_context = _v } +} diff --git a/src/atn_state.rs b/src/atn_state.rs new file mode 100644 index 0000000000..79d83d6fed --- /dev/null +++ b/src/atn_state.rs @@ -0,0 +1,422 @@ +use std::fmt::Debug; + +use once_cell::sync::OnceCell; + +use crate::interval_set::IntervalSet; +use crate::transition::Transition; + +pub(crate) const ATNSTATE_INVALID_TYPE: isize = 0; +pub(crate) const ATNSTATE_BASIC: isize = 1; +pub(crate) const ATNSTATE_RULE_START: isize = 2; +pub(crate) const ATNSTATE_BLOCK_START: isize = 3; +pub(crate) const ATNSTATE_PLUS_BLOCK_START: isize = 4; +pub(crate) const ATNSTATE_STAR_BLOCK_START: isize = 5; +pub(crate) const ATNSTATE_TOKEN_START: isize = 6; +pub(crate) const ATNSTATE_RULE_STOP: isize = 7; +pub(crate) const ATNSTATE_BLOCK_END: isize = 8; +pub(crate) const ATNSTATE_STAR_LOOP_BACK: isize = 9; +pub(crate) const ATNSTATE_STAR_LOOP_ENTRY: isize = 10; +pub(crate) const ATNSTATE_PLUS_LOOP_BACK: isize = 11; +pub(crate) const ATNSTATE_LOOP_END: isize = 12; +pub(crate) const ATNSTATE_INVALID_STATE_NUMBER: isize = -1; + +//might be changed later +#[derive(Debug, Eq, PartialEq)] +pub enum ATNStateType { + RuleStartState { + stop_state: ATNStateRef, + is_left_recursive: bool, + }, + RuleStopState, + BlockEndState(ATNStateRef), + LoopEndState(ATNStateRef), + StarLoopbackState, + BasicState, + DecisionState { + decision: isize, + nongreedy: bool, + state: ATNDecisionState, + }, + InvalidState, +} + +#[derive(Debug, Eq, PartialEq)] +pub enum ATNDecisionState { + StarLoopEntry { + loop_back_state: ATNStateRef, + is_precedence: bool, + }, + TokenStartState, + PlusLoopBack, + BlockStartState { + end_state: ATNStateRef, + en: ATNBlockStart, + }, +} + +#[derive(Debug, Eq, PartialEq)] +pub enum ATNBlockStart { + BasicBlockStart, + StarBlockStart, + PlusBlockStart(ATNStateRef), +} + +pub type ATNStateRef = usize; + +// todo no need for trait here, it is too slow for hot code +pub trait ATNState: Sync + Send + Debug { + fn has_epsilon_only_transitions(&self) -> bool; + + fn get_rule_index(&self) -> usize; + fn set_rule_index(&self, v: usize); + + fn get_next_tokens_within_rule(&self) -> &OnceCell; + // fn set_next_token_within_rule(&mut self, v: IntervalSet); + + fn get_state_type(&self) -> &ATNStateType; + fn get_state_type_mut(&mut self) -> &mut ATNStateType; + + fn get_state_type_id(&self) -> isize; + + fn get_state_number(&self) -> usize; + fn set_state_number(&self, state_number: isize); + + fn get_transitions(&self) -> &Vec>; + fn set_transitions(&self, t: Vec>); + fn add_transition(&mut self, trans: Box); +} + +#[derive(Debug)] +pub struct BaseATNState { + next_tokens_within_rule: OnceCell, + + // atn: Box, + epsilon_only_transitions: bool, + + pub rule_index: usize, + + pub state_number: usize, + + pub state_type_id: isize, + + pub state_type: ATNStateType, + + transitions: Vec>, +} + +impl BaseATNState { + pub fn new_base_atnstate() -> BaseATNState { + BaseATNState { + next_tokens_within_rule: OnceCell::new(), + epsilon_only_transitions: false, + rule_index: 0, + state_number: 0, + state_type_id: 0, + state_type: ATNStateType::InvalidState, + transitions: Vec::new(), + } + } + + fn is_non_greedy_exit_state(&self) -> bool { unimplemented!() } +} + +impl ATNState for BaseATNState { + fn has_epsilon_only_transitions(&self) -> bool { self.epsilon_only_transitions } + fn get_rule_index(&self) -> usize { self.rule_index } + + fn set_rule_index(&self, _v: usize) { unimplemented!() } + + fn get_next_tokens_within_rule(&self) -> &OnceCell { + &self.next_tokens_within_rule + } + + fn get_state_type(&self) -> &ATNStateType { &self.state_type } + + fn get_state_type_mut(&mut self) -> &mut ATNStateType { &mut self.state_type } + + fn get_state_type_id(&self) -> isize { self.state_type_id } + + fn get_state_number(&self) -> usize { self.state_number } + + fn set_state_number(&self, _state_number: isize) { unimplemented!() } + + fn get_transitions(&self) -> &Vec> { &self.transitions } + + fn set_transitions(&self, _t: Vec>) { unimplemented!() } + + fn add_transition(&mut self, trans: Box) { + if self.transitions.is_empty() { + self.epsilon_only_transitions = trans.is_epsilon() + } else { + self.epsilon_only_transitions &= trans.is_epsilon() + } + + let mut already_present = false; + for existing in self.transitions.iter() { + if existing.get_target() == trans.get_target() { + if existing.get_label().is_some() + && trans.get_label().is_some() + && existing.get_label() == trans.get_label() + { + already_present = true; + break; + } else if existing.is_epsilon() && trans.is_epsilon() { + already_present = true; + break; + } + } + } + if !already_present { + self.transitions.push(trans); + } + } +} +//pub struct BasicState { +// base: BaseATNState, +//} +// +//fn new_basic_state() -> BasicState { unimplemented!() } +// +//pub trait DecisionState:ATNState { +// +// fn get_decision(&self) -> isize; +// fn set_decision(&self, b: isize); +// +// fn get_non_greedy(&self) -> bool; +// fn set_non_greedy(&self, b: bool); +//} +// +//pub struct BaseDecisionState { +// base: BaseATNState, +// decision: isize, +// non_greedy: bool, +//} + +// +//fn new_base_decision_state() -> BaseDecisionState { unimplemented!() } +//impl DecisionState for BaseDecisionState { +// fn get_decision(&self) -> isize { unimplemented!() } +// +// fn set_decision(&self, b: isize) { unimplemented!() } +// +// fn get_non_greedy(&self) -> bool { unimplemented!() } +// +// fn set_non_greedy(&self, b: bool) { unimplemented!() } +//} +// +//impl ATNState for BaseDecisionState{ +// fn get_epsilon_only_transitions(&self) -> bool { +// self.base.get_epsilon_only_transitions() +// } +// +// fn get_rule_index(&self) -> isize { +// self.base.get_rule_index() +// } +// +// fn set_rule_index(&self, v: isize) { +// self.base.set_rule_index(v) +// } +// +// fn get_next_token_within_rule(&self) -> IntervalSet { +// self.base.get_next_token_within_rule() +// } +// +// fn set_next_token_within_rule(&self, v: IntervalSet) { +// self.base.set_next_token_within_rule(v) +// } +// +// fn get_atn(&self) -> Arc { +// self.base.get_atn() +// } +// +// fn set_atn(&self, atn: Box) { +// self.base.set_atn(atn) +// } +// +// fn get_state_type(&self) -> &ATNStateType { +// self.base.get_state_type() +// } +// +// fn get_state_number(&self) -> isize { +// self.base.get_state_number() +// } +// +// fn set_state_number(&self, stateNumber: isize) { +// self.base.set_state_number(stateNumber) +// } +// +// fn get_transitions(&self) -> Vec<&Transition> { +// self.base.get_transitions() +// } +// +// fn set_transitions(&self, t: Vec>) { +// self.base.set_transitions(t) +// } +// +// fn add_transition(&self, trans: Box, index: isize) { +// self.base.add_transition(trans, index) +// } +//} +//pub trait BlockStartState :DecisionState{ +// +// fn get_end_state(&self) -> &BlockEndState; +// fn set_end_state(&self, b: Box); +//} +// +//pub struct BaseBlockStartState { +// base: BaseDecisionState, +// end_state: Box, +//} +// +//fn new_block_start_state() -> BaseBlockStartState { unimplemented!() } +// +//impl BlockStartState for BaseBlockStartState { +// fn get_end_state(&self) -> &BlockEndState { unimplemented!() } +// +// fn set_end_state(&self, b: Box) { unimplemented!() } +//} +// +//impl DecisionState for BaseBlockStartState{ +// fn get_decision(&self) -> isize { +// self.base.get_decision() +// } +// +// fn set_decision(&self, b: isize) { +// self.base.set_decision(b) +// } +// +// fn get_non_greedy(&self) -> bool { +// self.base.get_non_greedy() +// } +// +// fn set_non_greedy(&self, b: bool) { +// self.base.set_non_greedy(b) +// } +//} +// +//impl ATNState for BaseBlockStartState{ +// fn get_epsilon_only_transitions(&self) -> bool { +// self.base.get_epsilon_only_transitions() +// } +// +// fn get_rule_index(&self) -> isize { +// self.base.get_rule_index() +// } +// +// fn set_rule_index(&self, v: isize) { +// self.base.set_rule_index(v) +// } +// +// fn get_next_token_within_rule(&self) -> IntervalSet { +// self.base.get_next_token_within_rule() +// } +// +// fn set_next_token_within_rule(&self, v: IntervalSet) { +// self.base.set_next_token_within_rule(v) +// } +// +// fn get_atn(&self) -> Arc { +// self.base.get_atn() +// } +// +// fn set_atn(&self, atn: Box) { +// self.base.set_atn(atn) +// } +// +// fn get_state_type(&self) -> &ATNStateType { +// self.base.get_state_type() +// } +// +// fn get_state_number(&self) -> isize { +// self.base.get_state_number() +// } +// +// fn set_state_number(&self, stateNumber: isize) { +// self.base.set_state_number(stateNumber) +// } +// +// fn get_transitions(&self) -> Vec<&Transition> { +// self.base.get_transitions() +// } +// +// fn set_transitions(&self, t: Vec>) { +// self.base.set_transitions(t) +// } +// +// fn add_transition(&self, trans: Box, index: isize) { +// self.base.add_transition(trans, index) +// } +//} +// +//pub struct BasicBlockStartState { +// base: BaseBlockStartState, +//} +// +//fn new_basic_block_start_state() -> BasicBlockStartState { unimplemented!() } +// +//pub struct BlockEndState { +// base: BaseATNState, +// start_state: Box, +//} +// +//fn new_block_end_state() -> BlockEndState { unimplemented!() } +// +//pub struct RuleStopState { +// base: BaseATNState, +//} +// +//fn new_rule_stop_state() -> RuleStopState { unimplemented!() } +// +//pub struct RuleStartState { +// base: BaseATNState, +// stop_state: Box, +// is_precedence_rule: bool, +//} +// +//fn new_rule_start_state() -> RuleStartState { unimplemented!() } +// +//pub struct PlusLoopbackState { +// base: BaseDecisionState, +//} +// +//fn new_plus_loopback_state() -> PlusLoopbackState { unimplemented!() } +// +//pub struct PlusBlockStartState { +// base: BaseBlockStartState, +// loop_back_state: Box, +//} +// +//fn new_plus_block_start_state() -> PlusBlockStartState { unimplemented!() } +// +//pub struct StarBlockStartState { +// base: BaseBlockStartState, +//} +// +//fn new_star_block_start_state() -> StarBlockStartState { unimplemented!() } +// +//pub struct StarLoopbackState { +// base: BaseATNState, +//} +// +//fn new_star_loopback_state() -> StarLoopbackState { unimplemented!() } +// +//pub struct StarLoopEntryState { +// base: BaseDecisionState, +// loop_back_state: Box, +// precedence_rule_decision: bool, +//} +// +//fn new_star_loop_entry_state() -> StarLoopEntryState { unimplemented!() } +// +//pub struct LoopEndState { +// base: BaseATNState, +// loop_back_state: Box, +//} +// +//fn new_loop_end_state() -> LoopEndState { unimplemented!() } +// +//pub struct TokensStartState { +// base: BaseDecisionState, +//} +// +//fn new_tokens_start_state() -> TokensStartState { unimplemented!() } diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000000..d5d43300b8 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,167 @@ +#![crate_type = "lib"] +#![feature(try_blocks)] +//#![feature(nll)] +#![feature(raw)] +#![feature(inner_deref)] +#![feature(is_sorted)] +#![feature(cell_update)] +#![feature(get_mut_unchecked)] +#![feature(specialization)] +#![feature(coerce_unsized)] +#![feature(unsize)] +#![feature(associated_type_defaults)] +#![warn(rust_2018_idioms)] +#![warn(missing_docs)] // warn if there is missing docs +#![warn(missing_debug_implementations)] +#![warn(trivial_numeric_casts)] + +//! # Antlr4 runtime +//! +//! **This is pre-release version.** +//! **Some small breaking changes are still possible, although none is currently planned** +//! +//! This is a Rust runtime for [ANTLR4] parser generator. +//! It is required to use parsers and lexers generated by [ANTLR4] parser generator +//! +//! This documentation refers to particular api used by generated parsers,lexers and syntax trees. +//! +//! For info on what is [ANTLR4] and how to generate parser please refer to: +//! - [ANTLR4] main repository +//! - [README] for Rust target +//! +//! [ANTLR4]: https://github.com/antlr/antlr4 +//! [README]: https://github.com/rrevenantt/antlr4rust/blob/master/README.md +//! +//! ### Customization +//! +//! All input and output can be customized and optimized for particular usecase by implementing +//! related trait. Each of them already has different implementations that should be enough for most cases. +//! For more details see docs for corresponding trait and containing module. +//! +//! Currently available are: +//! - [`CharStream`] - Lexer input, stream of char values with slicing support +//! - [`TokenFactory`] - How lexer creates tokens. +//! - [`Token`] - Element of [`TokenStream`] +//! - [`TokenStream`] - Parser input, created from lexer or other token source. +//! - [`ParserRuleContext`] - Node of created syntax tree. +//! +//! ### Zero-copy and lifetimes +//! +//! This library supports full zero-copy parsing. To allow this +//! `'input` lifetime is used everywhere inside to refer to data borrowed by parser. +//! Besides reference to input it also can be [`TokenFactory`] if it returns references to tokens. +//! See [`ArenaFactory`] as an example of such behavior. It allocates tokens in [`Arena`](typed_arena::Arena) and return references. +//! +//! Using generated parse tree you should be careful to not require longer lifetime after the parsing. +//! If that's the case you will likely get "does not live long enough" error on the input string, +//! despite actual lifetime conflict is happening much later +//! +//! If you need to generate owned versions of parse tree or you want simpler usage, +//! you can opt out zero-copy by requiring `'input` to be static. In this case it is easier to also use +//! types that contains "owned" in their name or constructor function like `OwningTokenFactory` +//! or `InputStream::new_owned()`. +//! +//! ### Visitors and Listeners +//! +//! Parse listeners must outlive 'input because they have to be stored inside of the parser. +//! It still allows to retrieve borrowed data from parse tree which should be enough to cover 99% use cases. +//! +//! `ParseTreeWalker` can accept listeners with arbitrary lifetime. +//! +//! Visitors also can have arbitrary lifetime +//! +//! ### Downcasting +//! +//! Rule context trait object support downcasting even for zero-copy case. +//! Also generic types(currently these are `H:ErrorStrategy` and `I:`[`TokenStream`]) that you can +//! access in generated parser from embedded actions also can be downcasted to concrete types. +//! To do it `TidExt::downcast_*` extension methods should be used. +//! +//! [`CharStream`]: crate::char_stream::CharStream +//! [`TokenFactory`]: crate::token_factory::TokenFactory +//! [`ArenaFactory`]: crate::token_factory::ArenaFactory +//! [`Token`]: crate::token::Token +//! [`TokenStream`]: crate::token_stream::TokenStream +//! [`ParserRuleContext`]: crate::parser_rule_context::ParserRuleContext + +#[macro_use] +extern crate lazy_static; + +#[doc(hidden)] +pub use lazy_static::lazy_static; + +#[doc(hidden)] +pub use better_any::{impl_tid, type_id, Tid, TidAble, TidExt}; + +#[doc(inline)] +pub use error_strategy::{BailErrorStrategy, DefaultErrorStrategy, ErrorStrategy}; +#[doc(inline)] +pub use input_stream::InputStream; +// #[doc(inline)] +// pub use input_stream::CodePointInputStream; +#[doc(inline)] +pub use lexer::{BaseLexer, Lexer}; +#[doc(inline)] +pub use parser::{BaseParser, ListenerId, Parser}; +//extern crate uuid; +#[doc(inline)] +pub use prediction_context::PredictionContextCache; + +pub mod atn_config; +pub mod atn_simulator; +pub mod int_stream; +mod lexer_action; +mod ll1_analyzer; +pub mod recognizer; +pub mod token_factory; +//pub mod tokenstream_rewriter; +#[doc(hidden)] +pub mod atn_deserialization_options; +#[doc(hidden)] +pub mod atn_state; +pub mod char_stream; +#[doc(hidden)] +pub mod dfa_state; +pub mod interval_set; +pub mod parser_rule_context; +mod prediction_context; +#[doc(hidden)] +pub mod semantic_context; +pub mod token_source; +pub mod token_stream; +//pub mod trace_listener; +#[doc(hidden)] +pub mod dfa; +#[doc(hidden)] +pub mod transition; +pub mod tree; +//pub mod file_stream; +#[doc(hidden)] +pub mod atn; +#[doc(hidden)] +pub mod atn_config_set; +#[doc(hidden)] +pub mod atn_deserializer; +pub mod common_token_stream; +mod dfa_serializer; +pub mod error_listener; +pub mod error_strategy; +pub mod errors; +mod input_stream; +pub mod lexer; +#[doc(hidden)] +pub mod lexer_action_executor; +pub mod lexer_atn_simulator; +pub mod parser; +pub mod parser_atn_simulator; +pub mod prediction_mode; +pub mod token; +pub mod trees; +mod utils; +//pub mod tokenstream_rewriter_test; +mod atn_type; +pub mod rule_context; +pub mod vocabulary; + +//#[cfg(test)] +// tests are either integration tests in "tests" foulder or unit tests in some modules diff --git a/src/parser_atn_simulator.rs b/src/parser_atn_simulator.rs new file mode 100644 index 0000000000..2054df9ae6 --- /dev/null +++ b/src/parser_atn_simulator.rs @@ -0,0 +1,1491 @@ +use std::borrow::{Borrow, BorrowMut}; +use std::cell::Cell; +use std::collections::{HashMap, HashSet}; +use std::hash::Hasher; +use std::marker::PhantomData; +use std::ops::Deref; +use std::rc::Rc; +use std::sync::Arc; +use std::{ptr, usize}; + +use bit_set::BitSet; +use typed_arena::Arena; + +use crate::atn::{ATN, INVALID_ALT}; +use crate::atn_config::ATNConfig; +use crate::atn_config_set::ATNConfigSet; +use crate::atn_simulator::{BaseATNSimulator, IATNSimulator}; +use crate::atn_state::ATNStateType::RuleStopState; +use crate::atn_state::{ATNDecisionState, ATNState, ATNStateRef, ATNStateType, ATNSTATE_BLOCK_END}; +use crate::dfa::{ScopeExt, DFA}; +use crate::dfa_state::{DFAState, DFAStateRef, PredPrediction}; +use crate::errors::{ANTLRError, NoViableAltError}; +use crate::int_stream::EOF; +use crate::interval_set::IntervalSet; +use crate::lexer_atn_simulator::ERROR_DFA_STATE_REF; +use crate::parser::{Parser, ParserNodeType}; +use crate::parser_rule_context::ParserRuleContext; +use crate::prediction_context::{ + MurmurHasherBuilder, PredictionContext, PredictionContextCache, EMPTY_PREDICTION_CONTEXT, + PREDICTION_CONTEXT_EMPTY_RETURN_STATE, +}; +use crate::prediction_mode::{ + all_subsets_conflict, all_subsets_equal, get_alts, get_conflicting_alt_subsets, + get_single_viable_alt, has_sll_conflict_terminating_prediction, + resolves_to_just_one_viable_alt, PredictionMode, +}; +use crate::rule_context::RuleContext; +use crate::semantic_context::SemanticContext; +use crate::token::{Token, TOKEN_EOF, TOKEN_EPSILON}; +use crate::token_factory::CommonTokenFactory; +use crate::token_stream::TokenStream; +use crate::transition::{ + ActionTransition, EpsilonTransition, PrecedencePredicateTransition, PredicateTransition, + RuleTransition, Transition, TransitionType, +}; + +/// ### The embodiment of the adaptive LL(*), ALL(*), parsing strategy. +/// +///

+/// The basic complexity of the adaptive strategy makes it harder to understand. +/// We begin with ATN simulation to build paths in a DFA. Subsequent prediction +/// requests go through the DFA first. If they reach a state without an edge for +/// the current symbol, the algorithm fails over to the ATN simulation to +/// complete the DFA path for the current input (until it finds a conflict state +/// or uniquely predicting state).

+/// +///

+/// All of that is done without using the outer context because we want to create +/// a DFA that is not dependent upon the rule invocation stack when we do a +/// prediction. One DFA works in all contexts. We avoid using context not +/// necessarily because it's slower, although it can be, but because of the DFA +/// caching problem. The closure routine only considers the rule invocation stack +/// created during prediction beginning in the decision rule. For example, if +/// prediction occurs without invoking another rule's ATN, there are no context +/// stacks in the configurations. When lack of context leads to a conflict, we +/// don't know if it's an ambiguity or a weakness in the strong LL(*) parsing +/// strategy (versus full LL(*)).

+/// +///

+/// When SLL yields a configuration set with conflict, we rewind the input and +/// retry the ATN simulation, this time using full outer context without adding +/// to the DFA. Configuration context stacks will be the full invocation stacks +/// from the start rule. If we get a conflict using full context, then we can +/// definitively say we have a true ambiguity for that input sequence. If we +/// don't get a conflict, it implies that the decision is sensitive to the outer +/// context. (It is not context-sensitive in the sense of context-sensitive +/// grammars.)

+/// +///

+/// The next time we reach this DFA state with an SLL conflict, through DFA +/// simulation, we will again retry the ATN simulation using full context mode. +/// This is slow because we can't save the results and have to "interpret" the +/// ATN each time we get that input.

+/// +/// **For more info see Java version** +pub struct ParserATNSimulator { + base: BaseATNSimulator, + prediction_mode: Cell, + start_index: Cell, + // pd:PhantomData

+} + +/// Just a local helper structure to spoil function parameters as little as possible +struct Local<'a, 'input, T: Parser<'input> + 'a> { + outer_context: Rc<>::Type>, + dfa: &'a DFA, + merge_cache: &'a mut MergeCache, + precedence: isize, + parser: &'a mut T, + pd: PhantomData>>, +} + +impl<'a, 'input, T: Parser<'input> + 'a> Local<'a, 'input, T> { + fn input(&mut self) -> &mut dyn TokenStream<'input, TF = T::TF> { + self.parser.get_input_stream_mut() + } + fn seek(&mut self, i: isize) { self.input().seek(i) } + fn outer_context(&self) -> &>::Type { + self.outer_context.deref() + } +} + +pub type MergeCache = HashMap< + (Arc, Arc), + Arc, + MurmurHasherBuilder, +>; + +impl ParserATNSimulator { + pub fn new( + atn: Arc, + decision_to_dfa: Arc>, + shared_context_cache: Arc, + ) -> ParserATNSimulator { + ParserATNSimulator { + base: BaseATNSimulator::new_base_atnsimulator( + atn, + decision_to_dfa, + shared_context_cache, + ), + prediction_mode: Cell::new(PredictionMode::LL), + start_index: Cell::new(0), + } + } + + pub fn get_prediction_mode(&self) -> PredictionMode { self.prediction_mode.get() } + + pub fn set_prediction_mode(&self, v: PredictionMode) { self.prediction_mode.set(v) } + + fn reset(&self) { unimplemented!() } + + pub fn adaptive_predict<'a, T: Parser<'a>>( + &self, + decision: isize, + parser: &mut T, + ) -> Result { + self.start_index.set(parser.get_input_stream_mut().index()); + let mut merge_cache: MergeCache = HashMap::with_hasher(MurmurHasherBuilder {}); + let mut local = Local { + outer_context: parser.get_parser_rule_context().clone(), + dfa: &self.decision_to_dfa()[decision as usize], + merge_cache: &mut merge_cache, + precedence: parser.get_precedence(), + parser, + pd: PhantomData, + }; + // 4!("adaptive_predict decision {}, is_prec {}",decision,local.dfa.is_precedence_dfa()); + + let m = local.input().mark(); + + let result = { + let s0 = if local.dfa.is_precedence_dfa() { + local + .dfa + .get_precedence_start_state(local.precedence /*parser.get_precedence()*/) + } else { + local.dfa.s0.read().unwrap().as_ref().copied() + }; + + let s0 = s0.unwrap_or_else(|| { + let s0_closure = self.compute_start_state( + local.dfa.atn_start_state, + // PredictionContext::from_rule_context::<'a,T::Node>(self.atn(), empty_ctx::().as_ref()), + EMPTY_PREDICTION_CONTEXT.clone(), + false, + &mut local, + ); + if local.dfa.is_precedence_dfa() { + let mut s0 = local.dfa.s0.read().unwrap().unwrap(); + let s0_closure_updated = self.apply_precedence_filter(&s0_closure, &mut local); + local.dfa.states.write().unwrap()[s0].configs = Box::new(s0_closure); + + s0 = self.add_dfastate( + &local.dfa, + DFAState::new_dfastate(0, Box::new(s0_closure_updated)), + ); + + local.dfa.set_precedence_start_state(local.precedence, s0); + s0 + } else { + let s0 = self + .add_dfastate(&local.dfa, DFAState::new_dfastate(0, Box::new(s0_closure))); + local.dfa.s0.write().unwrap().replace(s0); + s0 + } + }); + + self.exec_atn(&mut local, s0)? + }; + + local.input().seek(self.start_index.get()); + local.input().release(m); + // println!("result = {}", result); + Ok(result) + } + + #[allow(non_snake_case)] + fn exec_atn<'a, T: Parser<'a>>( + &self, + local: &mut Local<'_, 'a, T>, + s0: DFAStateRef, + ) -> Result { + let mut previousD = s0; + + let mut token = local.input().la(1); + loop { + // println!("exec atn loop previous D {}",previousD as isize -1); + let D = self + .get_existing_target_state(local.dfa, previousD, token) + .unwrap_or_else(|| self.compute_target_state(local.dfa, previousD, token, local)); + assert!(D > 0); + + let states = local.dfa.states.read().unwrap(); + if D == ERROR_DFA_STATE_REF { + let previousDstate = &states[previousD]; + let err = self.no_viable_alt( + local, + previousDstate.configs.as_ref(), + self.start_index.get(), + ); + local.input().seek(self.start_index.get()); + let alt = self.get_syn_valid_or_sem_invalid_alt_that_finished_decision_entry_rule( + previousDstate.configs.as_ref(), + local, + ); + if alt != INVALID_ALT { + return Ok(alt); + } + return Err(err); + } + + let Dstate = &states[D]; + if Dstate.requires_full_context && self.prediction_mode.get() != PredictionMode::SLL { + let mut conflicting_alts = Dstate.configs.conflicting_alts.clone(); //todo get rid of clone? + if !Dstate.predicates.is_empty() { + let conflict_index = local.input().index(); + if conflict_index != self.start_index.get() { + local.input().seek(self.start_index.get()) + } + + conflicting_alts = self.eval_semantic_context(local, &Dstate.predicates, true); + // println!("conflicting_alts {:?}",&conflicting_alts); + if conflicting_alts.len() == 1 { + return Ok(conflicting_alts.iter().next().unwrap() as isize); + } + + if conflict_index != self.start_index.get() { + local.input().seek(conflict_index) + } + } + + let s0_closure = self.compute_start_state( + local.dfa.atn_start_state, + PredictionContext::from_rule_context::<'a, T::Node>( + self.atn(), + local.outer_context(), + ), + true, + local, + ); + + self.report_attempting_full_context( + local.dfa, + &conflicting_alts, + Dstate.configs.as_ref(), + self.start_index.get(), + local.input().index(), + local.parser, + ); + + return self.exec_atn_with_full_context(local, &Dstate, s0_closure); + } + + if Dstate.is_accept_state { + if Dstate.predicates.is_empty() { + // println!("prediction !!{}",Dstate.prediction); + return Ok(Dstate.prediction); + } + + let stop_index = local.input().index(); + local.input().seek(self.start_index.get()); + + let alts = self.eval_semantic_context(local, &Dstate.predicates, true); + match alts.len() { + 0 => { + return Err(self.no_viable_alt( + local, + Dstate.configs.as_ref(), + self.start_index.get(), + )) + } + 1 => return Ok(alts.iter().next().unwrap() as isize), + _ => { + self.report_ambiguity( + local.dfa, + self.start_index.get(), + stop_index, + false, + &alts, + Dstate.configs.as_ref(), + local.parser, + ); + return Ok(alts.iter().next().unwrap() as isize); + } + } + } + previousD = D; + + if token != EOF { + local.input().consume(); + token = local.input().la(1); + } + } + } + + #[allow(non_snake_case)] + fn get_existing_target_state( + &self, + dfa: &DFA, + previousD: DFAStateRef, + t: isize, + ) -> Option { + dfa.states.read().unwrap()[previousD] + .edges + .get((t + 1) as usize) + .and_then(|x| match *x { + 0 => None, + x => Some(x), + }) + } + + #[allow(non_snake_case)] + fn compute_target_state<'a, T: Parser<'a>>( + &self, + dfa: &DFA, + previousD: DFAStateRef, + t: isize, + local: &mut Local<'_, 'a, T>, + ) -> DFAStateRef { + // println!("source config {:?}",dfa.states.read().unwrap()[previousD].configs.as_ref()); + let reach = self.compute_reach_set( + dfa.states.read().unwrap()[previousD].configs.as_ref(), + t, + false, + local, + ); + let reach = match reach { + None => { + self.add_dfaedge( + dfa.states.write().unwrap()[previousD].borrow_mut(), + t, + ERROR_DFA_STATE_REF, + ); + return ERROR_DFA_STATE_REF; + } + Some(x) => x, + }; + + let predicted_alt = self.get_unique_alt(&reach); + // println!("predicted_alt {}",predicted_alt); + + let mut D = DFAState::new_dfastate(0, reach.into()); + let reach = D.configs.as_ref(); + + if predicted_alt != INVALID_ALT { + D.is_accept_state = true; + D.configs.set_unique_alt(predicted_alt); + D.prediction = predicted_alt + } else if self.all_configs_in_rule_stop_state(reach) + || has_sll_conflict_terminating_prediction(self.prediction_mode.get(), reach) + { + let alts = self.get_conflicting_alts(reach); + D.prediction = alts.iter().next().unwrap() as isize; + D.configs.conflicting_alts = alts; + D.requires_full_context = true; + D.is_accept_state = true; + } + + // println!("target config {:?}",&D.configs); + if D.is_accept_state && D.configs.has_semantic_context() { + let decision_state = self.atn().decision_to_state[dfa.decision as usize]; + self.predicate_dfa_state(&mut D, self.atn().states[decision_state].deref()); + // println!("predicates compute target {:?}",&D.predicates); + if !D.predicates.is_empty() { + D.prediction = INVALID_ALT + } + } + + let D = self.add_dfastate(dfa, D); + self.add_dfaedge(dfa.states.write().unwrap()[previousD].borrow_mut(), t, D); + D + } + + fn predicate_dfa_state(&self, dfa_state: &mut DFAState, decision_state: &dyn ATNState) { + let nalts = decision_state.get_transitions().len(); + let alts_to_collect_preds_from = + self.get_conflicting_alts_or_unique_alt(dfa_state.configs.as_ref()); + let alt_to_pred = self.get_preds_for_ambig_alts( + &alts_to_collect_preds_from, + dfa_state.configs.as_ref(), + nalts, + ); + if let Some(alt_to_pred) = alt_to_pred { + dfa_state.predicates = + self.get_predicate_predictions(&alts_to_collect_preds_from, alt_to_pred); + dfa_state.prediction = INVALID_ALT; + } else { + dfa_state.prediction = alts_to_collect_preds_from + .iter() + .next() + .unwrap_or(0 /*in java it is -1 but looks like 0 is good enough*/) + as isize; + } + } + + fn exec_atn_with_full_context<'a, T: Parser<'a>>( + &self, + local: &mut Local<'_, 'a, T>, + _D: &DFAState, + s0: ATNConfigSet, + ) -> Result { + //println!("exec_atn_with_full_context"); + let full_ctx = true; + let mut found_exact_ambig = false; + let mut prev = s0; + local.input().seek(self.start_index.get()); + let mut t = local.input().la(1); + let mut predicted_alt = 0; + loop { + // println!("full_ctx loop"); + let reach = self.compute_reach_set(&prev, t, full_ctx, local); + prev = match reach { + None => { + local.input().seek(self.start_index.get()); + let alt = self + .get_syn_valid_or_sem_invalid_alt_that_finished_decision_entry_rule( + &prev, local, + ); + if alt != INVALID_ALT { + return Ok(alt); + } + return Err(self.no_viable_alt(local, &prev, self.start_index.get())); + } + Some(x) => x, + }; + + let alt_sub_sets = get_conflicting_alt_subsets(&prev); + prev.set_unique_alt(self.get_unique_alt(&prev)); + if prev.get_unique_alt() != INVALID_ALT { + predicted_alt = prev.get_unique_alt(); + break; + } + if self.prediction_mode.get() != PredictionMode::LL_EXACT_AMBIG_DETECTION { + predicted_alt = resolves_to_just_one_viable_alt(&alt_sub_sets); + if predicted_alt != INVALID_ALT { + break; + } + } else if all_subsets_conflict(&alt_sub_sets) && all_subsets_equal(&alt_sub_sets) { + found_exact_ambig = true; + predicted_alt = get_single_viable_alt(&alt_sub_sets); + break; + } + + if t != TOKEN_EOF { + local.input().consume(); + t = local.input().la(1); + } + } + + if prev.get_unique_alt() != INVALID_ALT { + self.report_context_sensitivity( + local.dfa, + predicted_alt, + &prev, + self.start_index.get(), + local.input().index(), + local.parser, + ); + return Ok(predicted_alt); + } + self.report_ambiguity( + local.dfa, + self.start_index.get(), + local.input().index(), + found_exact_ambig, + &prev.get_alts(), + &prev, + local.parser, + ); + + Ok(predicted_alt) + } + + // ATNConfigSet is pretty big so should be boxed to move it cheaper + fn compute_reach_set<'a, T: Parser<'a>>( + &self, + closure: &ATNConfigSet, + t: isize, + full_ctx: bool, + local: &mut Local<'_, 'a, T>, + ) -> Option { + // println!("in computeReachSet, starting closure: {:?}",closure); + let mut intermediate = ATNConfigSet::new_base_atnconfig_set(full_ctx); + + let mut skipped_stop_states = Vec::<&ATNConfig>::new(); + + for c in closure.get_items() { + let state = self.atn().states[c.get_state()].as_ref(); + if let RuleStopState = state.get_state_type() { + assert!(c.get_context().unwrap().is_empty()); + if full_ctx || t == TOKEN_EOF { + skipped_stop_states.push(c); + } + continue; + } + + for tr in state.get_transitions() { + self.get_reachable_target(tr.as_ref(), t).map(|target| { + let added = Box::new(c.cloned(self.atn().states[target].as_ref())); + intermediate.add_cached(added, Some(local.merge_cache)) + }); + } + } + // println!("intermediate {:?}",intermediate); + + let mut look_to_end_of_rule = false; + let mut reach = if skipped_stop_states.is_empty() + && t != TOKEN_EOF + && (intermediate.length() == 1 || self.get_unique_alt(&intermediate) != INVALID_ALT) + { + look_to_end_of_rule = true; + intermediate + } else { + let mut reach = ATNConfigSet::new_base_atnconfig_set(full_ctx); + let mut closure_busy = HashSet::new(); + // println!("calc reach {:?}",intermediate.length()); + + for c in intermediate.configs { + let treat_eofas_epsilon = t == TOKEN_EOF; + self.closure( + *c, + &mut reach, + &mut closure_busy, + false, + full_ctx, + treat_eofas_epsilon, + local, + ); + } + // println!("calc reach {:?}",reach); + reach + }; + + if t == TOKEN_EOF { + reach = self.remove_all_configs_not_in_rule_stop_state( + reach, + look_to_end_of_rule, + local.merge_cache, + ); + } + + if !skipped_stop_states.is_empty() + && (!full_ctx || !self.has_config_in_rule_stop_state(&reach)) + { + for c in skipped_stop_states { + reach.add_cached(c.clone().into(), Some(local.merge_cache)); + } + } + // println!("result?"); + if reach.is_empty() { + return None; + } + + // println!("result {:?}",&reach); + return Some(reach); + } + + fn has_config_in_rule_stop_state(&self, configs: &ATNConfigSet) -> bool { + for c in configs.get_items() { + if let RuleStopState = self.atn().states[c.get_state()].get_state_type() { + return true; + } + } + return false; + } + + fn all_configs_in_rule_stop_state(&self, configs: &ATNConfigSet) -> bool { + for c in configs.get_items() { + if let RuleStopState = self.atn().states[c.get_state()].get_state_type() { + } else { + return false; + } + } + return true; + } + + fn remove_all_configs_not_in_rule_stop_state( + &self, + configs: ATNConfigSet, + look_to_end_of_rule: bool, + merge_cache: &mut MergeCache, + ) -> ATNConfigSet { + if self.all_configs_in_rule_stop_state(&configs) { + return configs; + } + + // can just remove instead of creating new instance because we own configs + // it significantly differs from java version though + let mut result = ATNConfigSet::new_base_atnconfig_set(configs.full_context()); + for c in configs.configs { + let state = self.atn().states[c.get_state()].as_ref(); + if let RuleStopState = state.get_state_type() { + result.add_cached(c, Some(merge_cache)); + continue; + } + + if look_to_end_of_rule && state.has_epsilon_only_transitions() { + let next_tokens = self.atn().next_tokens(state); + if next_tokens.contains(TOKEN_EPSILON) { + let end_of_rule_state = self.atn().rule_to_stop_state[state.get_rule_index()]; + result.add_cached( + c.cloned(self.atn().states[end_of_rule_state].as_ref()) + .into(), + Some(merge_cache), + ); + } + } + } + + result + } + + fn compute_start_state<'a, T: Parser<'a>>( + &self, + a: ATNStateRef, + initial_ctx: Arc, + full_ctx: bool, + local: &mut Local<'_, 'a, T>, + ) -> ATNConfigSet { + // let initial_ctx = PredictionContext::prediction_context_from_rule_context(self.atn(),ctx); + let mut configs = ATNConfigSet::new_base_atnconfig_set(full_ctx); + // println!("initial {:?}",initial_ctx); + // println!("initial state {:?}",a); + + let atn_states = &self.atn().states; + for (i, tr) in atn_states[a].get_transitions().iter().enumerate() { + let target = &atn_states[tr.get_target()]; + let c = ATNConfig::new( + target.get_state_number(), + (i + 1) as isize, + Some(initial_ctx.clone()), + ); + let mut closure_busy = HashSet::new(); + self.closure( + c, + &mut configs, + &mut closure_busy, + true, + full_ctx, + false, + local, + ); + } + // println!("start state {:?}",configs); + + configs + } + + fn apply_precedence_filter<'a, T: Parser<'a>>( + &self, + configs: &ATNConfigSet, + local: &mut Local<'_, 'a, T>, + ) -> ATNConfigSet { + //println!("apply_precedence_filter"); + let mut states_from_alt1 = HashMap::new(); + let mut config_set = ATNConfigSet::new_base_atnconfig_set(configs.full_context()); + + for config in configs.get_items() { + if config.get_alt() != 1 { + continue; + } + + let updated_sem_ctx = config + .semantic_context + .eval_precedence(local.parser, local.outer_context()); + + if let Some(updated_sem_ctx) = updated_sem_ctx.as_deref() { + states_from_alt1.insert(config.get_state(), config.get_context()); + + if *updated_sem_ctx != *config.semantic_context { + config_set.add_cached( + Box::new(ATNConfig::new_with_semantic( + config.get_state(), + config.get_alt(), + config.get_context().cloned(), + Box::new(updated_sem_ctx.clone()), + )), + Some(local.merge_cache), + ); + } else { + config_set.add_cached(Box::new(config.clone()), Some(local.merge_cache)); + } + } + } + + for config in configs.get_items() { + if config.get_alt() == 1 { + continue; + } + if !config.is_precedence_filter_suppressed() { + if let Some(context) = states_from_alt1.get(&config.get_state()) { + if *context == config.get_context() { + continue; + } + } + } + config_set.add(Box::new(config.clone())); + } + + config_set + } + + fn get_reachable_target(&self, trans: &dyn Transition, ttype: isize) -> Option { + if trans.matches(ttype, 0, self.atn().max_token_type) { + return Some(trans.get_target()); + } + None + } + + fn get_preds_for_ambig_alts( + &self, + ambig_alts: &BitSet, + configs: &ATNConfigSet, + nalts: usize, + ) -> Option> { + let mut alt_to_pred = Vec::with_capacity(nalts + 1); + alt_to_pred.resize_with(nalts + 1, || None); + for c in configs.configs.iter() { + let alt = c.get_alt() as usize; + if ambig_alts.contains(alt) { + alt_to_pred[alt] = Some(SemanticContext::or( + alt_to_pred[alt].as_ref(), + Some(&*c.semantic_context), + )); + } + } + + let alt_to_pred: Vec = alt_to_pred + .into_iter() + .map(|it| { + if let Some(inner) = it { + inner + } else { + SemanticContext::NONE + } + }) + .collect(); + + let npred_alts = alt_to_pred + .iter() + .filter(|it| **it != SemanticContext::NONE) + .count(); + + if npred_alts == 0 { + return None; + } + return Some(alt_to_pred); + } + + fn get_predicate_predictions( + &self, + ambig_alts: &BitSet, + alt_to_pred: Vec, + ) -> Vec { + let mut pairs = vec![]; + let mut contains_predicate = false; + for (i, pred) in alt_to_pred.into_iter().enumerate().skip(1) { + if pred != SemanticContext::NONE { + contains_predicate = true + } + + if ambig_alts.contains(i) { + pairs.push(PredPrediction { + alt: i as isize, + pred, + }) + } + } + if !contains_predicate { + return Vec::new(); + } + + pairs + } + + fn get_syn_valid_or_sem_invalid_alt_that_finished_decision_entry_rule<'a, T: Parser<'a>>( + &self, + configs: &ATNConfigSet, + local: &mut Local<'_, 'a, T>, + ) -> isize { + let (sem_valid_configs, sem_invalid_configs) = + self.split_according_to_semantic_validity(configs, local); + + let alt = self.get_alt_that_finished_decision_entry_rule(&sem_valid_configs); + if alt != INVALID_ALT { + return alt; + } + + if !sem_invalid_configs.is_empty() { + let alt = self.get_alt_that_finished_decision_entry_rule(&sem_invalid_configs); + if alt != INVALID_ALT { + return alt; + } + } + + INVALID_ALT + } + + fn split_according_to_semantic_validity<'a, T: Parser<'a>>( + &self, + configs: &ATNConfigSet, + local: &mut Local<'_, 'a, T>, + ) -> (ATNConfigSet, ATNConfigSet) { + let mut succeeded = ATNConfigSet::new_base_atnconfig_set(configs.full_context()); + let mut failed = ATNConfigSet::new_base_atnconfig_set(configs.full_context()); + for c in configs.get_items() { + let clone = Box::new(c.clone()); + if *c.semantic_context != SemanticContext::NONE { + let predicate_eval_result = self.eval_predicate( + local, + &*c.semantic_context, + c.get_alt(), + configs.full_context(), + ); + if predicate_eval_result { + succeeded.add(clone); + } else { + failed.add(clone); + } + } else { + succeeded.add(clone); + } + } + (succeeded, failed) + } + + fn get_alt_that_finished_decision_entry_rule(&self, configs: &ATNConfigSet) -> isize { + let mut alts = IntervalSet::new(); + for c in configs.get_items() { + let has_empty_path = c.get_context().map(|x| x.has_empty_path()) == Some(true); + let is_stop = self.atn().states[c.get_state()].get_state_type() == &RuleStopState; + if c.get_reaches_into_outer_context() > 0 || (is_stop && has_empty_path) { + alts.add_one(c.get_alt()) + } + } + + return alts.get_min().unwrap_or(INVALID_ALT); + } + + fn eval_semantic_context<'a, T: Parser<'a>>( + &self, + local: &mut Local<'_, 'a, T>, + pred_predictions: &Vec, + complete: bool, + ) -> BitSet { + let mut predictions = BitSet::new(); + for pred in pred_predictions { + if pred.pred == SemanticContext::NONE { + predictions.insert(pred.alt as usize); + + if !complete { + break; + } + continue; + } + + let full_ctx = false; + let predicate_evaluation_result = + self.eval_predicate(local, &pred.pred, pred.alt, full_ctx); + + if predicate_evaluation_result { + predictions.insert(pred.alt as usize); + if !complete { + break; + } + } + } + predictions + } + + fn eval_predicate<'a, T: Parser<'a>>( + &self, + local: &mut Local<'_, 'a, T>, + pred: impl Borrow, + _alt: isize, + _full_ctx: bool, + ) -> bool { + pred.borrow().evaluate(local.parser, &*local.outer_context) + } + + fn closure<'a, T: Parser<'a>>( + &self, + config: ATNConfig, + configs: &mut ATNConfigSet, + closure_busy: &mut HashSet, + collect_predicates: bool, + full_ctx: bool, + treat_eofas_epsilon: bool, + local: &mut Local<'_, 'a, T>, + ) { + // println!("cl{}", config.get_state()); + let initial_depth = 0; + // local.merge_cache.clear(); + + self.closure_checking_stop_state( + config, + configs, + closure_busy, + collect_predicates, + full_ctx, + initial_depth, + treat_eofas_epsilon, + local, + ); + assert!(!full_ctx || !configs.get_dips_into_outer_context()) + } + + fn closure_checking_stop_state<'a, T: Parser<'a>>( + &self, + mut config: ATNConfig, + configs: &mut ATNConfigSet, + closure_busy: &mut HashSet, + collect_predicates: bool, + full_ctx: bool, + depth: isize, + treat_eofas_epsilon: bool, + local: &mut Local<'_, 'a, T>, + ) { + // println!("closure({:?})",config); + if let RuleStopState = self.atn().states[config.get_state()].get_state_type() { + if !config.get_context().unwrap().is_empty() { + config.get_context().unwrap().run(|temp| { + if temp.get_return_state(temp.length() - 1) + == PREDICTION_CONTEXT_EMPTY_RETURN_STATE + { + if full_ctx { + let new_config = config.cloned_with_new_ctx( + self.atn().states[config.get_state()].as_ref(), + Some(EMPTY_PREDICTION_CONTEXT.clone()), + ); + configs.add_cached(Box::new(new_config), Some(local.merge_cache)); + } else { + self.closure_work( + config.clone(), + configs, + closure_busy, + collect_predicates, + full_ctx, + depth, + treat_eofas_epsilon, + local, + ) + } + } + }); + let mut context = config.take_context(); + for i in 0..context.length() { + if context.get_return_state(i) == PREDICTION_CONTEXT_EMPTY_RETURN_STATE { + if i != context.length() - 1 { + panic!("EMPTY_RETURN_STATE is not last for some reason, please report error") + } + continue; + } + let return_state = context.get_return_state(i) as ATNStateRef; + // let new_ctx = context.take_parent(i).unwrap(); + let new_ctx = context.get_parent(i).cloned(); + let mut c = ATNConfig::new_with_semantic( + return_state, + config.get_alt(), + new_ctx, + config.semantic_context.clone(), + ); + c.set_reaches_into_outer_context(config.get_reaches_into_outer_context()); + assert!(depth > isize::min_value()); + self.closure_checking_stop_state( + c, + configs, + closure_busy, + collect_predicates, + full_ctx, + depth - 1, + treat_eofas_epsilon, + local, + ) + } + return; + } else if full_ctx { + configs.add_cached(Box::new(config), Some(local.merge_cache)); + return; + } else { + } + } + self.closure_work( + config, + configs, + closure_busy, + collect_predicates, + full_ctx, + depth, + treat_eofas_epsilon, + local, + ) + } + + fn closure_work<'a, T: Parser<'a>>( + &self, + config: ATNConfig, + configs: &mut ATNConfigSet, + closure_busy: &mut HashSet, + collect_predicates: bool, + full_ctx: bool, + depth: isize, + treat_eofas_epsilon: bool, + local: &mut Local<'_, 'a, T>, + ) { + //println!("depth {}",depth); + // println!("closure_work started {:?}",config); + let p = self.atn().states[config.get_state()].as_ref(); + if !p.has_epsilon_only_transitions() { + configs.add_cached(Box::new(config.clone()), Some(local.merge_cache)); + } + + for (i, tr) in p.get_transitions().iter().enumerate() { + if i == 0 && self.can_drop_loop_entry_edge_in_left_recursive_rule(&config) { + continue; + } + + let continue_collecting = tr.get_serialization_type() + != TransitionType::TRANSITION_ACTION + && collect_predicates; + let c = self.get_epsilon_target( + &config, + tr.as_ref(), + continue_collecting, + depth == 0, + full_ctx, + treat_eofas_epsilon, + local, + ); + if let Some(mut c) = c { + let mut new_depth = depth; + if let RuleStopState = self.atn().states[config.get_state()].get_state_type() { + assert!(!full_ctx); + + if local.dfa.is_precedence_dfa() { + let outermost_precedence_return = tr + .as_ref() + .cast::() + .outermost_precedence_return; + let atn_start_state = self.atn().states[local.dfa.atn_start_state].as_ref(); + if outermost_precedence_return == atn_start_state.get_rule_index() as isize + { + c.set_precedence_filter_suppressed(true); + } + } + + c.reaches_into_outer_context += 1; + if !closure_busy.insert(c.clone()) { + continue; + } + configs.set_dips_into_outer_context(true); + assert!(new_depth > isize::min_value()); + new_depth -= 1; + } else { + if !tr.is_epsilon() && !closure_busy.insert(c.clone()) { + continue; + } + + if tr.get_serialization_type() == TransitionType::TRANSITION_RULE { + if new_depth >= 0 { + new_depth += 1 + } + } + } + + self.closure_checking_stop_state( + c, + configs, + closure_busy, + continue_collecting, + full_ctx, + new_depth, + treat_eofas_epsilon, + local, + ) + }; + } + // println!("closure_work ended {:?}",config); + } + + fn can_drop_loop_entry_edge_in_left_recursive_rule(&self, _config: &ATNConfig) -> bool { + // if std::env::var("TURN_OFF_LR_LOOP_ENTRY_BRANCH_OPT").ok() + // .and_then(|it|str::parse::(&it).ok()) == Some(true) + // { return false } + + let state = self.atn().states[_config.get_state()].as_ref(); + + if let ATNStateType::DecisionState { + state: ATNDecisionState::StarLoopEntry { is_precedence, .. }, + .. + } = state.get_state_type() + { + if !*is_precedence + || _config.get_context().unwrap().is_empty() + || _config.get_context().unwrap().has_empty_path() + { + return false; + } + } else { + return false; + } + + let pred_ctx = _config.get_context().unwrap(); + let ctx_len = pred_ctx.length(); + for i in 0..ctx_len { + let return_state = self.atn().states[pred_ctx.get_return_state(i) as usize].as_ref(); + if return_state.get_rule_index() != state.get_rule_index() { + return false; + } + } + + let decision_start_state = state.get_transitions()[0].get_target(); + let decision_start_state = self.atn().states[decision_start_state].as_ref(); + let block_end_state_num = if let ATNStateType::DecisionState { + state: ATNDecisionState::BlockStartState { end_state, .. }, + .. + } = decision_start_state.get_state_type() + { + *end_state + } else { + unreachable!("cast error") + }; + + for i in 0..ctx_len { + let return_state = self.atn().states[pred_ctx.get_return_state(i) as usize].as_ref(); + if return_state.get_transitions().len() != 1 + || !return_state.get_transitions()[0].is_epsilon() + { + // println!("test1"); + return false; + } + let return_state_target = + self.atn().states[return_state.get_transitions()[0].get_target()].as_ref(); + if return_state.get_state_type_id() == ATNSTATE_BLOCK_END + && ptr::eq(return_state_target, state) + { + continue; + } + if return_state.get_state_number() == block_end_state_num { + continue; + } + if return_state_target.get_state_number() == block_end_state_num { + continue; + } + + if return_state_target.get_state_type_id() == ATNSTATE_BLOCK_END + && return_state_target.get_transitions().len() == 1 + && return_state_target.get_transitions()[0].is_epsilon() + && return_state_target.get_transitions()[0].get_target() == state.get_state_number() + { + continue; + } + // println!("test2"); + return false; + } + // println!("dropping on state {} ", state.get_state_number()); + + return true; + } + // + // fn get_rule_name(&self, index: isize) -> String { unimplemented!() } + + fn get_epsilon_target<'a, T: Parser<'a>>( + &self, + config: &ATNConfig, + t: &dyn Transition, + collect_predicates: bool, + in_context: bool, + full_ctx: bool, + treat_eofas_epsilon: bool, + local: &mut Local<'_, 'a, T>, + ) -> Option { + match t.get_serialization_type() { + TransitionType::TRANSITION_EPSILON => { + Some(config.cloned(self.atn().states[t.get_target()].as_ref())) + } + TransitionType::TRANSITION_RULE => { + Some(self.rule_transition(config, t.cast::())) + } + TransitionType::TRANSITION_PREDICATE => self.pred_transition( + config, + t.cast::(), + collect_predicates, + in_context, + full_ctx, + local, + ), + TransitionType::TRANSITION_ACTION => { + Some(self.action_transition(config, t.cast::())) + } + TransitionType::TRANSITION_PRECEDENCE => self.precedence_transition( + config, + t.cast::(), + collect_predicates, + in_context, + full_ctx, + local, + ), + TransitionType::TRANSITION_ATOM + | TransitionType::TRANSITION_SET + | TransitionType::TRANSITION_RANGE => { + if treat_eofas_epsilon && t.matches(TOKEN_EOF, 0, 1) { + Some(config.cloned(self.atn().states[t.get_target()].as_ref())) + } else { + None + } + } + TransitionType::TRANSITION_NOTSET | TransitionType::TRANSITION_WILDCARD => None, + } + } + + fn action_transition(&self, config: &ATNConfig, t: &ActionTransition) -> ATNConfig { + config.cloned(self.atn().states[t.target].as_ref()) + } + + fn precedence_transition<'a, T: Parser<'a>>( + &self, + config: &ATNConfig, + pt: &PrecedencePredicateTransition, + collect_predicates: bool, + in_context: bool, + full_ctx: bool, + local: &mut Local<'_, 'a, T>, + ) -> Option { + let target = self.atn().states[pt.target].deref(); + if collect_predicates && in_context { + if full_ctx { + let curr_pos = local.input().index(); + local.input().seek(self.start_index.get()); + let prec_succeeds = self.eval_predicate( + local, + pt.get_predicate().unwrap(), + config.get_alt(), + full_ctx, + ); + local.input().seek(curr_pos); + if prec_succeeds { + return Some(config.cloned(target)); + } + } else { + let new_sem_ctx = + SemanticContext::and(Some(&*config.semantic_context), pt.get_predicate()); + return Some(config.cloned_with_new_semantic(target, Box::new(new_sem_ctx))); + } + } else { + return Some(config.cloned(target)); + } + + None + } + + fn pred_transition<'a, T: Parser<'a>>( + &self, + config: &ATNConfig, + pt: &PredicateTransition, + collect_predicates: bool, + in_context: bool, + full_ctx: bool, + local: &mut Local<'_, 'a, T>, + ) -> Option { + let target = self.atn().states[pt.target].deref(); + if collect_predicates && (!pt.is_ctx_dependent || (pt.is_ctx_dependent && in_context)) { + if full_ctx { + let curr_pos = local.input().index(); + local.input().seek(self.start_index.get()); + let prec_succeeds = self.eval_predicate( + local, + pt.get_predicate().unwrap(), + config.get_alt(), + full_ctx, + ); + local.input().seek(curr_pos); + if prec_succeeds { + return Some(config.cloned(target)); + } + } else { + let new_sem_ctx = + SemanticContext::and(Some(&*config.semantic_context), pt.get_predicate()); + return Some(config.cloned_with_new_semantic(target, Box::new(new_sem_ctx))); + } + } else { + return Some(config.cloned(target)); + } + + None + } + + fn rule_transition(&self, config: &ATNConfig, t: &RuleTransition) -> ATNConfig { + assert!(config.get_context().is_some()); + let new_ctx = PredictionContext::new_singleton( + config.get_context().cloned(), + t.follow_state as isize, + ); + config.cloned_with_new_ctx(self.atn().states[t.target].as_ref(), Some(new_ctx.into())) + } + + fn get_conflicting_alts(&self, configs: &ATNConfigSet) -> BitSet { + let altsets = get_conflicting_alt_subsets(configs); + get_alts(&altsets) + } + + //todo can return Cow + fn get_conflicting_alts_or_unique_alt(&self, configs: &ATNConfigSet) -> BitSet { + return if configs.get_unique_alt() != INVALID_ALT { + BitSet::new().modify_with(|it| { + it.insert(configs.get_unique_alt() as usize); + }) + } else { + configs.conflicting_alts.clone() + }; + } + // + // fn get_token_name(&self, t: isize) -> String { unimplemented!() } + // + // fn get_lookahead_name(&self, input: TokenStream) -> String { unimplemented!() } + // + // fn dump_dead_end_configs(&self, nvae: * NoViableAltError) { unimplemented!() } + // + fn no_viable_alt<'a, T: Parser<'a>>( + &self, + local: &mut Local<'_, 'a, T>, + _configs: &ATNConfigSet, + start_index: isize, + ) -> ANTLRError { + let start_token = local.parser.get_input_stream().get(start_index).borrow(); + let start_token = Token::to_owned(start_token); + let offending_token = local.input().lt(1).unwrap().borrow(); + let offending_token = Token::to_owned(offending_token); + ANTLRError::NoAltError(NoViableAltError::new_full( + local.parser, + start_token, + offending_token, + )) + } + + fn get_unique_alt(&self, configs: &ATNConfigSet) -> isize { + let mut alt = INVALID_ALT; + for c in configs.get_items() { + if alt == INVALID_ALT { + alt = c.get_alt() + } else if c.get_alt() != alt { + return INVALID_ALT; + } + } + + alt + } + + fn add_dfaedge(&self, from: &mut DFAState, t: isize, to: DFAStateRef) -> DFAStateRef { + if t < -1 || t > self.atn().max_token_type { + return to; + } + if from.edges.is_empty() { + from.edges.resize(self.atn().max_token_type as usize + 2, 0); + } + from.edges[(t + 1) as usize] = to; + + to + } + + fn add_dfastate(&self, dfa: &DFA, mut dfastate: DFAState) -> DFAStateRef { + if dfastate.state_number == ERROR_DFA_STATE_REF { + return ERROR_DFA_STATE_REF; + } + let mut states = dfa.states.write().unwrap(); + + let a = states.deref().len(); + dfastate.state_number = a; + + let key = dfastate.default_hash(); + //let mut new_hash = key; + if let Some(st) = dfa.states_map.write().unwrap().get_mut(&key) { + if let Some(&st) = st.iter().find(|&&it| states[it] == dfastate) { + return st; + } + } + + if !dfastate.configs.read_only() { + dfastate.configs.optimize_configs(self); + dfastate.configs.set_read_only(true); + // new_hash = dfastate.default_hash(); + } + + states.push(dfastate); + + // if key != new_hash { + dfa.states_map + .write() + .unwrap() + .entry(key) + .or_insert(Vec::new()) + .push(a); + // } + a + } + + fn report_attempting_full_context<'a, T: Parser<'a>>( + &self, + dfa: &DFA, + conflicting_alts: &BitSet, + configs: &ATNConfigSet, + start_index: isize, + stop_index: isize, + parser: &mut T, + ) { + // let ambig_index = parser.get_current_token().get_token_index(); + parser + .get_error_lister_dispatch() + .report_attempting_full_context( + parser, + dfa, + start_index, + stop_index, + conflicting_alts, + configs, + ) + } + + fn report_context_sensitivity<'a, T: Parser<'a>>( + &self, + dfa: &DFA, + prediction: isize, + configs: &ATNConfigSet, + start_index: isize, + stop_index: isize, + parser: &mut T, + ) { + parser + .get_error_lister_dispatch() + .report_context_sensitivity(parser, dfa, start_index, stop_index, prediction, configs) + } + + fn report_ambiguity<'a, T: Parser<'a>>( + &self, + dfa: &DFA, + start_index: isize, + stop_index: isize, + exact: bool, + ambig_alts: &BitSet, + configs: &ATNConfigSet, + parser: &mut T, + ) { + parser.get_error_lister_dispatch().report_ambiguity( + parser, + dfa, + start_index, + stop_index, + exact, + ambig_alts, + configs, + ) + } +} + +impl IATNSimulator for ParserATNSimulator { + fn shared_context_cache(&self) -> &PredictionContextCache { self.base.shared_context_cache() } + + fn atn(&self) -> &ATN { self.base.atn() } + + fn decision_to_dfa(&self) -> &Vec { self.base.decision_to_dfa() } +} diff --git a/src/token_factory.rs b/src/token_factory.rs new file mode 100644 index 0000000000..e0bacadea0 --- /dev/null +++ b/src/token_factory.rs @@ -0,0 +1,267 @@ +use std::borrow::Cow::{Borrowed, Owned}; +use std::borrow::{Borrow, BorrowMut, Cow}; +use std::cell::Cell; +use std::fmt::Debug; +use std::marker::{PhantomData, Unsize}; +use std::ops::{CoerceUnsized, Deref}; +use std::sync::atomic::AtomicIsize; + +use typed_arena::Arena; + +use crate::char_stream::{CharStream, InputData}; +use crate::token::Token; +use crate::token::{CommonToken, OwningToken, TOKEN_INVALID_TYPE}; +use better_any::{Tid, TidAble}; + +lazy_static! { + pub static ref CommonTokenFactoryDEFAULT: Box = + Box::new(CommonTokenFactory {}); + pub static ref INVALID_OWNING: Box = Box::new(OwningToken { + token_type: TOKEN_INVALID_TYPE, + channel: 0, + start: -1, + stop: -1, + token_index: AtomicIsize::new(-1), + line: -1, + column: -1, + text: "".to_owned(), + read_only: true, + }); + pub static ref INVALID_COMMON: Box> = Box::new(CommonToken { + token_type: TOKEN_INVALID_TYPE, + channel: 0, + start: -1, + stop: -1, + token_index: AtomicIsize::new(-1), + line: -1, + column: -1, + text: Borrowed(""), + read_only: true, + }); +} + +// todo remove redundant allocation for arenas + +/// Trait for creating tokens +pub trait TokenFactory<'a>: TidAble<'a> + Sized { + /// type of tokens emitted by this factory + type Inner: Token + ?Sized + 'a; + /// ownership of the emitted tokens + type Tok: Borrow + Clone + 'a + Debug; + // can relax InputData to just ToOwned here? + /// type of the underlying storage + type Data: InputData + ?Sized; + /// type of the reference to `Self::Data` that factory needs for producing tokens + type From: Borrow + Into>; + + /// Creates token + fn create( + &'a self, + source: Option<&mut T>, + ttype: isize, + text: Option<::Owned>, + channel: isize, + start: isize, + stop: isize, + line: isize, + column: isize, + ) -> Self::Tok + where + T: CharStream + ?Sized; + + /// Creates invalid token + /// Invalid tokens must have `TOKEN_INVALID_TYPE` token type. + fn create_invalid() -> Self::Tok; +} + +#[derive(Default, Tid)] +pub struct CommonTokenFactory; + +impl Default for &'_ CommonTokenFactory { + fn default() -> Self { &**CommonTokenFactoryDEFAULT } +} + +impl<'a> TokenFactory<'a> for CommonTokenFactory { + type Inner = CommonToken<'a>; + type Tok = Box; + type Data = str; + type From = Cow<'a, str>; + + #[inline] + fn create( + &'a self, + source: Option<&mut T>, + ttype: isize, + text: Option, + channel: isize, + start: isize, + stop: isize, + line: isize, + column: isize, + ) -> Self::Tok + where + T: CharStream + ?Sized, + { + let text = match (text, source) { + (Some(t), _) => Owned(t), + (None, Some(x)) => { + if stop >= x.size() || start >= x.size() { + Borrowed("") + } else { + x.get_text(start, stop).into() + } + } + _ => Borrowed(""), + }; + Box::new(CommonToken { + token_type: ttype, + channel, + start, + stop, + token_index: AtomicIsize::new(-1), + line, + column, + text, + read_only: false, + }) + } + + fn create_invalid() -> Self::Tok { INVALID_COMMON.clone() } +} + +#[derive(Default, Tid)] +pub struct OwningTokenFactory; + +impl<'a> TokenFactory<'a> for OwningTokenFactory { + type Inner = OwningToken; + type Tok = Box; + type Data = str; + type From = String; + + #[inline] + fn create( + &'a self, + source: Option<&mut T>, + ttype: isize, + text: Option, + channel: isize, + start: isize, + stop: isize, + line: isize, + column: isize, + ) -> Self::Tok + where + T: CharStream + ?Sized, + { + let text = match (text, source) { + (Some(t), _) => t, + (None, Some(x)) => { + if stop >= x.size() || start >= x.size() { + "".to_owned() + } else { + x.get_text(start, stop) + } + } + _ => String::new(), + }; + Box::new(OwningToken { + token_type: ttype, + channel, + start, + stop, + token_index: AtomicIsize::new(-1), + line, + column, + text, + read_only: false, + }) + } + + fn create_invalid() -> Self::Tok { INVALID_OWNING.clone() } +} + +// pub struct DynFactory<'input,TF:TokenFactory<'.into()input>>(TF) where TF::Tok:CoerceUnsized>; +// impl <'input,TF:TokenFactory<'input>> TokenFactory<'input> for DynFactory<'input,TF> +// where TF::Tok:CoerceUnsized> +// { +// +// } + +pub type ArenaOwningFactory<'a> = ArenaFactory<'a, OwningTokenFactory, OwningToken>; +pub type ArenaCommonFactory<'a> = ArenaFactory<'a, CommonTokenFactory, CommonToken<'a>>; + +/// This is a wrapper for Token factory that allows to allocate tokens in separate arena. +/// It can allow to significantly improve performance by passing Tokens by references everywhere. +/// +/// Requires `&'a Tok: Default` bound to produce invalid tokens, which can be easily implemented +/// like this: +/// ```text +/// lazy_static!{ static ref INVALID_TOKEN:CustomToken = ... } +/// impl Default for &'_ CustomToken { +/// fn default() -> Self { &**INVALID_TOKEN } +/// } +/// ``` +// Box is used here because it is almost always should be used for token factory +#[derive(Tid)] +pub struct ArenaFactory<'input, TF, T> +where + TF: TokenFactory<'input, Tok = Box, Inner = T>, + T: Token + Clone + 'input, +{ + arena: Arena, + factory: TF, + pd: PhantomData<&'input str>, +} + +impl<'input, TF, T> Default for ArenaFactory<'input, TF, T> +where + TF: TokenFactory<'input, Tok = Box, Inner = T> + Default, + T: Token + Clone + 'input, +{ + fn default() -> Self { + Self { + arena: Default::default(), + factory: Default::default(), + pd: Default::default(), + } + } +} + +impl<'input, TF, Tok> TokenFactory<'input> for ArenaFactory<'input, TF, Tok> +where + TF: TokenFactory<'input, Tok = Box, Inner = Tok>, + Tok: Token + Clone + TidAble<'input>, + for<'a> &'a Tok: Default, +{ + type Inner = Tok; + type Tok = &'input Tok; + type Data = TF::Data; + type From = TF::From; + + #[inline] + fn create( + &'input self, + source: Option<&mut T>, + ttype: isize, + text: Option<::Owned>, + channel: isize, + start: isize, + stop: isize, + line: isize, + column: isize, + ) -> Self::Tok + where + T: CharStream + ?Sized, + { + let token = self + .factory + .create(source, ttype, text, channel, start, stop, line, column); + self.arena.alloc(*token) + } + + fn create_invalid() -> &'input Tok { <&Tok as Default>::default() } +} + +pub trait TokenAware<'input> { + type TF: TokenFactory<'input> + 'input; +} diff --git a/tests/gen/csvvisitor.rs b/tests/gen/csvvisitor.rs new file mode 100644 index 0000000000..0858cb5d2f --- /dev/null +++ b/tests/gen/csvvisitor.rs @@ -0,0 +1,61 @@ +#![allow(nonstandard_style)] +// Generated from CSV.g4 by ANTLR 4.8 +use super::csvparser::*; +use antlr_rust::parser_rule_context::RuleContextExt; +use antlr_rust::tree::{ParseTreeVisitor, VisitChildren}; + +/** + * This interface defines a complete generic visitor for a parse tree produced + * by {@link CSVParser}. + * + * @param The return type of the visit operation. Use {@link Void} for + * operations with no return type. + */ +pub trait CSVVisitor<'input>: ParseTreeVisitor<'input, CSVParserContextType> { + /** + * Visit a parse tree produced by {@link CSVParser#csvFile}. + * @param ctx the parse tree + */ + fn visit_csvFile(&mut self, ctx: &CsvFileContext<'input>) { self.visit_children(ctx) } + + /** + * Visit a parse tree produced by {@link CSVParser#hdr}. + * @param ctx the parse tree + */ + fn visit_hdr(&mut self, ctx: &HdrContext<'input>) { self.visit_children(ctx) } + + /** + * Visit a parse tree produced by {@link CSVParser#row}. + * @param ctx the parse tree + */ + fn visit_row(&mut self, ctx: &RowContext<'input>) { self.visit_children(ctx) } + + /** + * Visit a parse tree produced by {@link CSVParser#field}. + * @param ctx the parse tree + */ + fn visit_field(&mut self, ctx: &FieldContext<'input>) { self.visit_children(ctx) } + + // /// By default recursively visits all childrens of the node. + // /// Implement it if you want different default visiting logic. + // fn visit_children(&mut self, node: &(dyn CSVParserContext<'input> + 'input)) {} +} + +//impl<'a,'input,T> CSVVisitor<'input> for T where T: CSVVisitor<'input> + 'a{ +// // default fn visit_csvFile(&mut self, ctx: &CsvFileContext<'input>){ +// self.visit_children(ctx) +// } +// default fn visit_hdr(&mut self, ctx: &HdrContext<'input>){ +// self.visit_children(ctx) +// } +// default fn visit_row(&mut self, ctx: &RowContext<'input>){ +// self.visit_children(ctx) +// } +// default fn visit_field(&mut self, ctx: &FieldContext<'input>){ +// self.visit_children(ctx) +// } +// +// default fn visit_children(&mut self, node: &(dyn CSVParserContext<'input> + 'input)){ +// node.accept_children(self as &mut (dyn CSVVisitor<'input> + 'a)) +// } +//} diff --git a/tests/my_test.rs b/tests/my_test.rs new file mode 100644 index 0000000000..06d567fa5c --- /dev/null +++ b/tests/my_test.rs @@ -0,0 +1,398 @@ +#![feature(try_blocks)] +#![feature(inner_deref)] +#![feature(specialization)] +#![feature(coerce_unsized)] +//! Integration tests + +// #[macro_use] +// extern crate lazy_static; + +mod gen { + use std::fmt::Write; + use std::io::Read; + use std::iter::FromIterator; + + use antlr_rust::common_token_stream::CommonTokenStream; + use antlr_rust::int_stream::IntStream; + use antlr_rust::lexer::Lexer; + use antlr_rust::parser_rule_context::{BaseParserRuleContext, ParserRuleContext}; + use antlr_rust::token::{Token, TOKEN_EOF}; + use antlr_rust::token_factory::{ArenaCommonFactory, CommonTokenFactory, OwningTokenFactory}; + use antlr_rust::token_stream::{TokenStream, UnbufferedTokenStream}; + use antlr_rust::tree::{ + ParseTree, ParseTreeListener, ParseTreeVisitor, ParseTreeWalker, TerminalNode, Tree, + VisitChildren, Visitable, + }; + use antlr_rust::InputStream; + use csvlexer::*; + use csvlistener::*; + use csvparser::CSVParser; + use referencetoatnlexer::ReferenceToATNLexer; + use referencetoatnlistener::ReferenceToATNListener; + use referencetoatnparser::ReferenceToATNParser; + use xmllexer::XMLLexer; + + use crate::gen::csvparser::{ + CSVParserContext, CSVParserContextType, CsvFileContext, HdrContext, RowContext, + }; + use crate::gen::csvvisitor::CSVVisitor; + use crate::gen::labelslexer::LabelsLexer; + use crate::gen::labelsparser::{AddContext, EContextAll, LabelsParser}; + use crate::gen::referencetoatnparser::{ + ReferenceToATNParserContext, ReferenceToATNParserContextType, + }; + use crate::gen::simplelrlexer::SimpleLRLexer; + use crate::gen::simplelrlistener::SimpleLRListener; + use crate::gen::simplelrparser::{ + SimpleLRParser, SimpleLRParserContext, SimpleLRParserContextType, SimpleLRTreeWalker, + }; + + mod csvlexer; + mod csvlistener; + mod csvparser; + mod csvvisitor; + mod referencetoatnlexer; + mod referencetoatnlistener; + mod referencetoatnparser; + mod simplelrlexer; + mod simplelrlistener; + mod simplelrparser; + mod xmllexer; + + fn test_static(arg: T) {} + + #[test] + fn lexer_test_xml() -> std::io::Result<()> { + let data = r#" +> + +"# + .to_owned(); + let mut _lexer = XMLLexer::new(InputStream::new(&*data)); + // _lexer.base.add_error_listener(); + let _a = "a".to_owned() + ""; + let mut string = String::new(); + { + let mut token_source = UnbufferedTokenStream::new_unbuffered(&mut _lexer); + while token_source.la(1) != TOKEN_EOF { + { + let token = token_source.lt(1).unwrap(); + + let len = token.get_stop() as usize + 1 - token.get_start() as usize; + string.extend( + format!( + "{},len {}:\n{}\n", + xmllexer::_SYMBOLIC_NAMES[token.get_token_type() as usize] + .unwrap_or(&format!("{}", token.get_token_type())), + len, + String::from_iter( + data.chars().skip(token.get_start() as usize).take(len) + ) + ) + .chars(), + ); + } + token_source.consume(); + } + } + println!("{}", string); + println!( + "{}", + _lexer + .get_interpreter() + .unwrap() + .get_dfa() + .to_lexer_string() + ); + Ok(()) + } + + #[test] + fn lexer_test_csv() { + println!("test started lexer_test_csv"); + let tf = ArenaCommonFactory::default(); + let mut _lexer = CSVLexer::new_with_token_factory( + InputStream::new("V123,V2\nd1,d222".into()), + // Box::new(UTF16InputStream::from_str("V123,V2\nd1,d222","".into())), + &tf, + ); + let mut token_source = UnbufferedTokenStream::new_buffered(_lexer); + let mut token_source_iter = token_source.token_iter(); + assert_eq!( + token_source_iter.next().unwrap().to_string(), + "[@0,0:3='V123',<5>,1:0]" + ); + assert_eq!( + token_source_iter.next().unwrap().to_string(), + "[@1,4:4=',',<1>,1:4]" + ); + assert_eq!( + token_source_iter.next().unwrap().to_string(), + "[@2,5:6='V2',<5>,1:5]" + ); + assert_eq!( + token_source_iter.next().unwrap().to_string(), + "[@3,7:7='\\n',<3>,1:7]" + ); + assert_eq!( + token_source_iter.next().unwrap().to_string(), + "[@4,8:9='d1',<5>,2:0]" + ); + assert_eq!( + token_source_iter.next().unwrap().to_string(), + "[@5,10:10=',',<1>,2:2]" + ); + assert_eq!( + token_source_iter.next().unwrap().to_string(), + "[@6,11:14='d222',<5>,2:3]" + ); + assert_eq!( + token_source_iter.next().unwrap().to_string(), + "[@7,15:14='',<-1>,2:7]" + ); + assert!(token_source_iter.next().is_none()); + } + + struct Listener {} + + impl<'input> ParseTreeListener<'input, CSVParserContextType> for Listener { + fn enter_every_rule(&mut self, ctx: &dyn CSVParserContext<'input>) { + println!( + "rule entered {}", + csvparser::ruleNames + .get(ctx.get_rule_index()) + .unwrap_or(&"error") + ) + } + } + + impl<'input> CSVListener<'input> for Listener {} + + #[test] + fn parser_test_csv() { + println!("test started"); + let tf = ArenaCommonFactory::default(); + let mut _lexer = + CSVLexer::new_with_token_factory(InputStream::new("V123,V2\nd1,d2\n".into()), &tf); + let token_source = CommonTokenStream::new(_lexer); + let mut parser = CSVParser::new(token_source); + parser.add_parse_listener(Box::new(Listener {})); + println!("\nstart parsing parser_test_csv"); + let result = parser.csvFile(); + assert!(result.is_ok()); + assert_eq!( + result.unwrap().to_string_tree(&*parser), + "(csvFile (hdr (row (field V123) , (field V2) \\n)) (row (field d1) , (field d2) \\n))" + ); + } + + struct Listener2 {} + + impl<'input> ParseTreeListener<'input, ReferenceToATNParserContextType> for Listener2 { + fn enter_every_rule(&mut self, ctx: &dyn ReferenceToATNParserContext<'input>) { + println!( + "rule entered {}", + referencetoatnparser::ruleNames + .get(ctx.get_rule_index()) + .unwrap_or(&"error") + ) + } + } + + impl<'input> ReferenceToATNListener<'input> for Listener2 {} + + static FACTORY: OwningTokenFactory = OwningTokenFactory; + + #[test] + fn test_adaptive_predict_and_owned_tree() { + let text = "a 34 b".to_owned(); + let mut _lexer = ReferenceToATNLexer::new_with_token_factory( + InputStream::new_owned(text.into_boxed_str()), + &FACTORY, + ); + let token_source = CommonTokenStream::new(_lexer); + let mut parser = ReferenceToATNParser::new(token_source); + parser.add_parse_listener(Box::new(Listener2 {})); + println!("\nstart parsing adaptive_predict_test"); + let result = parser.a(); + assert!(result.is_ok()); + test_static(result); + } + + struct Listener3; + + impl<'input> ParseTreeListener<'input, SimpleLRParserContextType> for Listener3 { + fn visit_terminal(&mut self, node: &TerminalNode<'input, SimpleLRParserContextType>) { + println!("terminal node {}", node.symbol.get_text()); + } + + fn enter_every_rule(&mut self, ctx: &dyn SimpleLRParserContext<'input>) { + println!( + "rule entered {}", + simplelrparser::ruleNames + .get(ctx.get_rule_index()) + .unwrap_or(&"error") + ) + } + + fn exit_every_rule(&mut self, ctx: &dyn SimpleLRParserContext<'input>) { + println!( + "rule exited {}", + simplelrparser::ruleNames + .get(ctx.get_rule_index()) + .unwrap_or(&"error") + ) + } + } + + impl<'input> SimpleLRListener<'input> for Listener3 {} + + #[test] + fn test_lr() { + let mut _lexer = SimpleLRLexer::new(InputStream::new("x y z".into())); + let token_source = CommonTokenStream::new(_lexer); + let mut parser = SimpleLRParser::new(token_source); + parser.add_parse_listener(Box::new(Listener3)); + println!("\nstart parsing lr_test"); + let result = parser.s().expect("failed recursion parsion"); + assert_eq!(result.to_string_tree(&*parser), "(s (a (a (a x) y) z))"); + } + + #[test] + fn test_immediate_lr() { + let mut _lexer = SimpleLRLexer::new(InputStream::new("x y z".into())); + let token_source = CommonTokenStream::new(_lexer); + let mut parser = SimpleLRParser::new(token_source); + parser.add_parse_listener(Box::new(Listener3)); + println!("\nstart parsing lr_test"); + let result = parser.a().expect("failed immediate recursion parsing"); + assert_eq!(result.to_string_tree(&*parser), "(a (a (a x) y) z)"); + } + + struct Listener4 { + data: String, + } + + impl<'input> ParseTreeListener<'input, SimpleLRParserContextType> for Listener4 { + fn visit_terminal(&mut self, node: &TerminalNode<'input, SimpleLRParserContextType>) { + println!("enter terminal"); + writeln!(&mut self.data, "terminal node {}", node.symbol.get_text()); + } + fn enter_every_rule(&mut self, ctx: &dyn SimpleLRParserContext<'input>) { + println!( + "rule entered {}", + simplelrparser::ruleNames + .get(ctx.get_rule_index()) + .unwrap_or(&"error") + ) + } + } + + impl<'input> SimpleLRListener<'input> for Listener4 {} + + #[test] + fn test_remove_listener() { + let mut _lexer = SimpleLRLexer::new(InputStream::new("x y z".into())); + let token_source = CommonTokenStream::new(_lexer); + let mut parser = SimpleLRParser::new(token_source); + parser.add_parse_listener(Box::new(Listener3)); + let id = parser.add_parse_listener(Box::new(Listener4 { + data: String::new(), + })); + let result = parser.s().expect("expected to parse successfully"); + + let mut listener = parser.remove_parse_listener(id); + assert_eq!( + &listener.data, + "terminal node x\nterminal node y\nterminal node z\n" + ); + + println!("--------"); + listener.data.clear(); + + let listener = SimpleLRTreeWalker::walk(listener, &*result); + assert_eq!( + &listener.data, + "terminal node x\nterminal node y\nterminal node z\n" + ); + } + + #[test] + fn test_byte_parser() {} + + mod labelslexer; + mod labelslistener; + mod labelsparser; + + #[test] + fn test_complex_convert() { + let codepoints = "(a+4)*2".chars().map(|x| x as u32).collect::>(); + // let codepoints = "(a+4)*2"; + let input = InputStream::new(&*codepoints); + let mut lexer = LabelsLexer::new(input); + let token_source = CommonTokenStream::new(lexer); + let mut parser = LabelsParser::new(token_source); + let result = parser.s().expect("parser error"); + let string = result.q.as_ref().unwrap().get_v(); + assert_eq!("* + a 4 2", string); + let x = result.q.as_deref().unwrap(); + match x { + EContextAll::MultContext(x) => assert_eq!("(a+4)", x.a.as_ref().unwrap().get_text()), + _ => panic!("oops"), + } + } + + struct MyCSVVisitor<'i, T>(Vec<&'i str>, T); + + impl<'i, T> ParseTreeVisitor<'i, CSVParserContextType> for MyCSVVisitor<'i, T> { + fn visit_terminal(&mut self, node: &TerminalNode<'i, CSVParserContextType>) { + if node.symbol.get_token_type() == csvparser::TEXT { + if let Cow::Borrowed(s) = node.symbol.text { + self.0.push(s); + } + } + } + } + + use csvparser::RowContextAttrs; + use std::borrow::Cow; + use std::rc::Rc; + + impl<'i, T> CSVVisitor<'i> for MyCSVVisitor<'i, T> { + fn visit_hdr(&mut self, ctx: &HdrContext<'i>) {} + + fn visit_row(&mut self, ctx: &RowContext<'i>) { + if ctx.field_all().len() > 1 { + self.visit_children(ctx) + } + } + } + + // tests zero-copy parsing with non static visitor + #[test] + fn test_visitor() { + fn parse<'a>(tf: &'a ArenaCommonFactory<'a>) -> Rc> { + let mut _lexer = + CSVLexer::new_with_token_factory(InputStream::new("h1,h2\nd1,d2\nd3\n".into()), tf); + let token_source = CommonTokenStream::new(_lexer); + let mut parser = CSVParser::new(token_source); + let result = parser.csvFile().expect("parsed unsuccessfully"); + + let mut test = 5; + let mut visitor = MyCSVVisitor(Vec::new(), &mut test); + result.accept(&mut visitor); + assert_eq!(visitor.0, vec!["d1", "d2"]); + + result + } + let tf = ArenaCommonFactory::default(); + + let result = parse(&tf); + } +} diff --git a/tests/perf.rs b/tests/perf.rs new file mode 100644 index 0000000000..7ef37e29d7 --- /dev/null +++ b/tests/perf.rs @@ -0,0 +1,51 @@ +#![feature(try_blocks)] +#![feature(inner_deref)] +#![feature(test)] +#[macro_use] +extern crate lazy_static; +extern crate test; + +mod gen { + use test::Bencher; + + use antlr_rust::common_token_stream::CommonTokenStream; + use antlr_rust::InputStream; + + // use crate::gen::perflexer::PerfLexer; + // use crate::gen::perfparser::PerfParser; + // mod perflexer; + // mod perfparser; + // mod perflistener; + + // #[cfg(not(debug_assertions))] + // #[test] + // fn performance(){ + #[ignore] + // #[bench] + fn performance(b: &mut Bencher) { + // b.iter(|| { + // let lexer = PerfLexer::new(Box::new(InputStream::new(input.to_string()))); + // let source = CommonTokenStream::new(lexer); + // let mut parser = PerfParser::new(Box::new(source)); + // let result = parser.stat().expect("oops"); + // }); + } + + const input: &str = "\ + between X1 and X2 or between X3 and X4 and + between X1 and X2 or between X3 and X4 and + between X1 and X2 or between X3 and X4 and + between X1 and X2 or between X3 and X4 and + between X1 and X2 or between X3 and X4 and + between X1 and X2 or between X3 and X4 and + between X1 and X2 or between X3 and X4 and + between X1 and X2 or between X3 and X4 and + between X1 and X2 or between X3 and X4 and + between X1 and X2 or between X3 and X4 and + between X1 and X2 or between X3 and X4 and + between X1 and X2 or between X3 and X4 and + between X1 and X2 or between X3 and X4 and + between X1 and X2 or between X3 and X4 and + between X1 and X2 or between X3 and X4 + ;"; +} diff --git a/tool/resources/org/antlr/v4/tool/templates/codegen/Rust/Rust.stg b/tool/resources/org/antlr/v4/tool/templates/codegen/Rust/Rust.stg new file mode 120000 index 0000000000..c0219d6dab --- /dev/null +++ b/tool/resources/org/antlr/v4/tool/templates/codegen/Rust/Rust.stg @@ -0,0 +1 @@ +../../../../../../../../../runtime/Rust/templates/Rust.stg \ No newline at end of file diff --git a/tool/src/org/antlr/v4/codegen/OutputModelController.java b/tool/src/org/antlr/v4/codegen/OutputModelController.java index 5126670d39..e7e1fd06f7 100644 --- a/tool/src/org/antlr/v4/codegen/OutputModelController.java +++ b/tool/src/org/antlr/v4/codegen/OutputModelController.java @@ -8,36 +8,13 @@ import org.antlr.runtime.tree.CommonTreeNodeStream; import org.antlr.v4.analysis.LeftRecursiveRuleAltInfo; -import org.antlr.v4.codegen.model.Action; -import org.antlr.v4.codegen.model.AltBlock; -import org.antlr.v4.codegen.model.BaseListenerFile; -import org.antlr.v4.codegen.model.BaseVisitorFile; -import org.antlr.v4.codegen.model.Choice; -import org.antlr.v4.codegen.model.CodeBlockForAlt; -import org.antlr.v4.codegen.model.CodeBlockForOuterMostAlt; -import org.antlr.v4.codegen.model.LabeledOp; -import org.antlr.v4.codegen.model.LeftRecursiveRuleFunction; -import org.antlr.v4.codegen.model.Lexer; -import org.antlr.v4.codegen.model.LexerFile; -import org.antlr.v4.codegen.model.ListenerFile; -import org.antlr.v4.codegen.model.OutputModelObject; -import org.antlr.v4.codegen.model.Parser; -import org.antlr.v4.codegen.model.ParserFile; -import org.antlr.v4.codegen.model.RuleActionFunction; -import org.antlr.v4.codegen.model.RuleFunction; -import org.antlr.v4.codegen.model.RuleSempredFunction; -import org.antlr.v4.codegen.model.SrcOp; -import org.antlr.v4.codegen.model.StarBlock; -import org.antlr.v4.codegen.model.VisitorFile; +import org.antlr.v4.codegen.model.*; import org.antlr.v4.codegen.model.decl.CodeBlock; import org.antlr.v4.misc.Utils; import org.antlr.v4.parse.ANTLRParser; import org.antlr.v4.parse.GrammarASTAdaptor; -import org.antlr.v4.tool.Alternative; -import org.antlr.v4.tool.ErrorType; -import org.antlr.v4.tool.Grammar; -import org.antlr.v4.tool.LeftRecursiveRule; -import org.antlr.v4.tool.Rule; +import org.antlr.v4.semantics.UseDefAnalyzer; +import org.antlr.v4.tool.*; import org.antlr.v4.tool.ast.ActionAST; import org.antlr.v4.tool.ast.BlockAST; import org.antlr.v4.tool.ast.GrammarAST; @@ -171,13 +148,14 @@ public void buildRuleFunction(Parser parser, Rule r) { Grammar g = getGrammar(); for (ActionAST a : r.actions) { if ( a instanceof PredAST ) { - PredAST p = (PredAST)a; + PredAST p = (PredAST) a; RuleSempredFunction rsf = parser.sempredFuncs.get(r); - if ( rsf==null ) { + if (rsf == null) { rsf = new RuleSempredFunction(delegate, r, function.ctxType); parser.sempredFuncs.put(r, rsf); } - rsf.actions.put(g.sempreds.get(p), new Action(delegate, p)); + boolean isCtxDependent = UseDefAnalyzer.actionIsContextDependent(p); + rsf.actions.put(g.sempreds.get(p), new Action(delegate, p, isCtxDependent)); } } diff --git a/tool/src/org/antlr/v4/codegen/Target.java b/tool/src/org/antlr/v4/codegen/Target.java index ec6d08f9ae..13117d45b2 100644 --- a/tool/src/org/antlr/v4/codegen/Target.java +++ b/tool/src/org/antlr/v4/codegen/Target.java @@ -18,16 +18,9 @@ import org.antlr.v4.tool.Grammar; import org.antlr.v4.tool.Rule; import org.antlr.v4.tool.ast.GrammarAST; -import org.stringtemplate.v4.NumberRenderer; -import org.stringtemplate.v4.ST; -import org.stringtemplate.v4.STErrorListener; -import org.stringtemplate.v4.STGroup; -import org.stringtemplate.v4.STGroupFile; -import org.stringtemplate.v4.StringRenderer; +import org.stringtemplate.v4.*; import org.stringtemplate.v4.misc.STMessage; -import java.net.URL; - /** */ public abstract class Target { /** For pure strings of Java 16-bit Unicode char, how can we display @@ -560,6 +553,11 @@ private void reportError(STMessage msg) { return result; } + // Override if target need to preprocess actions, for example support escape sequences + public String processActionText(String text) { + return text; + } + /** * @since 4.3 */ diff --git a/tool/src/org/antlr/v4/codegen/model/Action.java b/tool/src/org/antlr/v4/codegen/model/Action.java index ada06aee52..dd0de6862b 100644 --- a/tool/src/org/antlr/v4/codegen/model/Action.java +++ b/tool/src/org/antlr/v4/codegen/model/Action.java @@ -20,17 +20,26 @@ import java.util.ArrayList; import java.util.List; -/** */ +/** + * + */ public class Action extends RuleElement { - @ModelElement public List chunks; + @ModelElement + public List chunks; + public boolean isCtxDependent = false; + + // Rust target needs to know if ctx is null for safe casting + public Action(OutputModelFactory factory, ActionAST ast, boolean needsCtx) { + this(factory, ast); + isCtxDependent = needsCtx; + } public Action(OutputModelFactory factory, ActionAST ast) { - super(factory,ast); + super(factory, ast); RuleFunction rf = factory.getCurrentRuleFunction(); if (ast != null) { chunks = ActionTranslator.translateAction(factory, rf, ast.token, ast); - } - else { + } else { chunks = new ArrayList(); } //System.out.println("actions="+chunks); diff --git a/tool/src/org/antlr/v4/codegen/model/OutputFile.java b/tool/src/org/antlr/v4/codegen/model/OutputFile.java index cd233246bb..4b28009bf3 100644 --- a/tool/src/org/antlr/v4/codegen/model/OutputFile.java +++ b/tool/src/org/antlr/v4/codegen/model/OutputFile.java @@ -6,8 +6,10 @@ package org.antlr.v4.codegen.model; +import org.antlr.runtime.CommonToken; import org.antlr.v4.Tool; import org.antlr.v4.codegen.OutputModelFactory; +import org.antlr.v4.parse.ANTLRParser; import org.antlr.v4.tool.Grammar; import org.antlr.v4.tool.ast.ActionAST; @@ -35,7 +37,9 @@ public Map buildNamedActions(Grammar g) { Map namedActions = new HashMap(); for (String name : g.namedActions.keySet()) { ActionAST ast = g.namedActions.get(name); - namedActions.put(name, new Action(factory, ast)); + String processedText = factory.getGenerator().getTarget().processActionText(ast.getText()); + ActionAST action = new ActionAST(new CommonToken(ANTLRParser.ACTION, processedText)); + namedActions.put(name, new Action(factory, action)); } return namedActions; } diff --git a/tool/src/org/antlr/v4/codegen/model/RuleFunction.java b/tool/src/org/antlr/v4/codegen/model/RuleFunction.java index d6b745f0ad..ff7ae2e86d 100644 --- a/tool/src/org/antlr/v4/codegen/model/RuleFunction.java +++ b/tool/src/org/antlr/v4/codegen/model/RuleFunction.java @@ -10,16 +10,7 @@ import org.antlr.runtime.tree.CommonTree; import org.antlr.runtime.tree.CommonTreeNodeStream; import org.antlr.v4.codegen.OutputModelFactory; -import org.antlr.v4.codegen.model.decl.AltLabelStructDecl; -import org.antlr.v4.codegen.model.decl.AttributeDecl; -import org.antlr.v4.codegen.model.decl.ContextRuleGetterDecl; -import org.antlr.v4.codegen.model.decl.ContextRuleListGetterDecl; -import org.antlr.v4.codegen.model.decl.ContextRuleListIndexedGetterDecl; -import org.antlr.v4.codegen.model.decl.ContextTokenGetterDecl; -import org.antlr.v4.codegen.model.decl.ContextTokenListGetterDecl; -import org.antlr.v4.codegen.model.decl.ContextTokenListIndexedGetterDecl; -import org.antlr.v4.codegen.model.decl.Decl; -import org.antlr.v4.codegen.model.decl.StructDecl; +import org.antlr.v4.codegen.model.decl.*; import org.antlr.v4.misc.FrequencySet; import org.antlr.v4.misc.Utils; import org.antlr.v4.parse.GrammarASTAdaptor; @@ -35,18 +26,9 @@ import org.antlr.v4.tool.ast.GrammarAST; import org.antlr.v4.tool.ast.PredAST; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; -import static org.antlr.v4.parse.ANTLRParser.RULE_REF; -import static org.antlr.v4.parse.ANTLRParser.STRING_LITERAL; -import static org.antlr.v4.parse.ANTLRParser.TOKEN_REF; +import static org.antlr.v4.parse.ANTLRParser.*; /** */ public class RuleFunction extends OutputModelObject { @@ -91,8 +73,8 @@ public RuleFunction(OutputModelFactory factory, Rule r) { Collection decls = r.args.attributes.values(); if ( decls.size()>0 ) { args = new ArrayList(); - ruleCtx.addDecls(decls); for (Attribute a : decls) { + ruleCtx.addDecl(new AttributeDecl(factory, a, true)); args.add(new AttributeDecl(factory, a)); } ruleCtx.ctorAttrs = args; diff --git a/tool/src/org/antlr/v4/codegen/model/decl/AttributeDecl.java b/tool/src/org/antlr/v4/codegen/model/decl/AttributeDecl.java index 04367523b5..5f5bdff20f 100644 --- a/tool/src/org/antlr/v4/codegen/model/decl/AttributeDecl.java +++ b/tool/src/org/antlr/v4/codegen/model/decl/AttributeDecl.java @@ -9,10 +9,19 @@ import org.antlr.v4.codegen.OutputModelFactory; import org.antlr.v4.tool.Attribute; -/** */ +/** + * + */ public class AttributeDecl extends Decl { public String type; public String initValue; + public boolean initFromConstructor = false; + + public AttributeDecl(OutputModelFactory factory, Attribute a, boolean initFromConstructor) { + this(factory, a); + this.initFromConstructor = initFromConstructor; + } + public AttributeDecl(OutputModelFactory factory, Attribute a) { super(factory, a.name, a.decl); this.type = a.type; diff --git a/tool/src/org/antlr/v4/codegen/model/decl/StructDecl.java b/tool/src/org/antlr/v4/codegen/model/decl/StructDecl.java index f7a48ee5f4..49f9350cb4 100644 --- a/tool/src/org/antlr/v4/codegen/model/decl/StructDecl.java +++ b/tool/src/org/antlr/v4/codegen/model/decl/StructDecl.java @@ -7,11 +7,7 @@ package org.antlr.v4.codegen.model.decl; import org.antlr.v4.codegen.OutputModelFactory; -import org.antlr.v4.codegen.model.DispatchMethod; -import org.antlr.v4.codegen.model.ListenerDispatchMethod; -import org.antlr.v4.codegen.model.ModelElement; -import org.antlr.v4.codegen.model.OutputModelObject; -import org.antlr.v4.codegen.model.VisitorDispatchMethod; +import org.antlr.v4.codegen.model.*; import org.antlr.v4.runtime.misc.OrderedHashSet; import org.antlr.v4.tool.Attribute; import org.antlr.v4.tool.Rule; @@ -25,7 +21,7 @@ */ public class StructDecl extends Decl { public String derivedFromName; // rule name or label name - public boolean provideCopyFrom; + public boolean provideCopyFrom; // can be used to check if there are named Alts @ModelElement public OrderedHashSet attrs = new OrderedHashSet(); @ModelElement public OrderedHashSet getters = new OrderedHashSet(); @ModelElement public Collection ctorAttrs; @@ -42,6 +38,9 @@ public class StructDecl extends Decl { public OrderedHashSet ruleContextDecls = new OrderedHashSet(); public OrderedHashSet ruleContextListDecls = new OrderedHashSet(); public OrderedHashSet attributeDecls = new OrderedHashSet(); + // Required to be able to differently initialize attributes that come from constructor + // required for Rust target + public OrderedHashSet notCtorAttrs = new OrderedHashSet(); public StructDecl(OutputModelFactory factory, Rule r) { super(factory, factory.getGenerator().getTarget().getRuleFunctionContextStructName(r)); @@ -88,6 +87,10 @@ else if ( d instanceof RuleContextDecl ) { } else if ( d instanceof AttributeDecl ) { attributeDecls.add(d); + AttributeDecl attr = (AttributeDecl) d; + if (!attr.initFromConstructor) { + notCtorAttrs.add(attr); + } } } diff --git a/tool/src/org/antlr/v4/codegen/target/RustTarget.java b/tool/src/org/antlr/v4/codegen/target/RustTarget.java new file mode 100644 index 0000000000..391e7acd44 --- /dev/null +++ b/tool/src/org/antlr/v4/codegen/target/RustTarget.java @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +package org.antlr.v4.codegen.target; + +import org.antlr.v4.codegen.CodeGenerator; +import org.antlr.v4.codegen.Target; +import org.antlr.v4.codegen.UnicodeEscapes; +import org.antlr.v4.parse.ANTLRParser; +import org.antlr.v4.tool.ErrorType; +import org.antlr.v4.tool.Grammar; +import org.antlr.v4.tool.ast.GrammarAST; +import org.stringtemplate.v4.ST; +import org.stringtemplate.v4.STErrorListener; +import org.stringtemplate.v4.STGroup; +import org.stringtemplate.v4.StringRenderer; +import org.stringtemplate.v4.misc.STMessage; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Locale; +import java.util.Set; + +public class RustTarget extends Target { + + protected static final String[] rustKeywords = { + "_", "abstract", "alignof", "as", "become ", + "box", "break", "const", "continue", "crate", + "do", "else", "enum", "extern", "false", + "final", "fn", "for", "if", "impl", + "in", "let", "loop", "macro", "match", + "mod", "move", "mut", "offsetof", "override", + "priv", "proc", "pub", "pure", "ref", + "return", "Self", "self", "sizeof", "static", + "struct", "super", "trait", "true", "type", + "typeof", "unsafe", "unsized", "use", "virtual", + "where", "while", "yield" + }; + + /** + * Avoid grammar symbols in this set to prevent conflicts in gen'd code. + */ + protected final Set badWords = new HashSet(); + + public RustTarget(CodeGenerator gen) { + super(gen, "Rust"); + } + + public String getVersion() { + return "4.8"; + } + + public Set getBadWords() { + if (badWords.isEmpty()) { + addBadWords(); + } + + return badWords; + } + + protected void addBadWords() { + badWords.addAll(Arrays.asList(rustKeywords)); + badWords.add("rule"); + badWords.add("parserRule"); + } + + @Override + public String encodeIntAsCharEscape(int v) { + + if (v < Character.MIN_VALUE || v > Character.MAX_VALUE) { + throw new IllegalArgumentException(String.format("Cannot encode the specified value: %d", v)); + } + +// if (v >= 0 && v < targetCharValueEscape.length && targetCharValueEscape[v] != null) { +// return targetCharValueEscape[v]; +// } + +// if (v >= 0x20 && v < 127 && (!Character.isDigit(v) || v == '8' || v == '9')) { +// return String.valueOf((char)v); +// } + + if (v >= 0 && v <= 127) { + String oct = Integer.toHexString(v | 0x100).substring(1, 3); + return "\\x" + oct; + } + + //encode surrogates + if (v >= 0xD800 && v <= 0xDFFF) { + v += 0x3000; + } + + String hex = Integer.toHexString(v); + return "\\u{" + hex + "}"; + } + + @Override + public String getRecognizerFileName(boolean header) { + Grammar g = getCodeGenerator().g; + assert g != null; + String name; + switch (g.getType()) { + case ANTLRParser.PARSER: + name = g.name.endsWith("Parser") ? g.name.substring(0, g.name.length() - 6) : g.name; + name = name.toLowerCase() + "parser"; + break; + case ANTLRParser.LEXER: + name = g.name.endsWith("Lexer") ? g.name.substring(0, g.name.length() - 5) : g.name; // trim off "lexer" + name = name.toLowerCase() + "lexer"; + break; + case ANTLRParser.COMBINED: + name = g.name.toLowerCase() + "parser"; + break; + default: + return "INVALID_FILE_NAME"; + } + ST extST = getTemplates().getInstanceOf("codeFileExtension"); + return name + extST.render(); + } + + @Override + public String getListenerFileName(boolean header) { + assert gen.g.name != null; + ST extST = getTemplates().getInstanceOf("codeFileExtension"); + String listenerName = gen.g.name.toLowerCase() + "listener"; + return listenerName + extST.render(); + } + + @Override + public String getVisitorFileName(boolean header) { + assert gen.g.name != null; + ST extST = getTemplates().getInstanceOf("codeFileExtension"); + String listenerName = gen.g.name.toLowerCase() + "visitor"; + return listenerName + extST.render(); + } + + @Override + public String getBaseListenerFileName(boolean header) { + assert gen.g.name != null; + ST extST = getTemplates().getInstanceOf("codeFileExtension"); + String listenerName = gen.g.name + "_baseListener"; + return listenerName + extST.render(); + } + + @Override + public String getBaseVisitorFileName(boolean header) { + assert gen.g.name != null; + ST extST = getTemplates().getInstanceOf("codeFileExtension"); + String listenerName = gen.g.name + "_baseVisitor"; + return listenerName + extST.render(); + } + + @Override + protected boolean visibleGrammarSymbolCausesIssueInGeneratedCode(GrammarAST idNode) { + return getBadWords().contains(idNode.getText()); + } + + @Override + protected STGroup loadTemplates() { + STGroup result = super.loadTemplates(); +// result.registerRenderer(Integer.class, new NumberRenderer()); + result.registerRenderer(String.class, new RustStringRenderer(), true); + result.setListener(new STErrorListener() { + @Override + public void compileTimeError(STMessage msg) { + reportError(msg); + } + + @Override + public void runTimeError(STMessage msg) { + reportError(msg); + } + + @Override + public void IOError(STMessage msg) { + reportError(msg); + } + + @Override + public void internalError(STMessage msg) { + reportError(msg); + } + + private void reportError(STMessage msg) { + getCodeGenerator().tool.errMgr.toolError(ErrorType.STRING_TEMPLATE_WARNING, msg.cause, msg.toString()); + } + }); + + return result; + } + + protected static class RustStringRenderer extends StringRenderer { + + @Override + public String toString(Object o, String formatString, Locale locale) { + if ("java-escape".equals(formatString)) { + // 5C is the hex code for the \ itself + return ((String) o).replace("\\u", "\\u{005C}u"); + } + if ("low".equals(formatString)) { + return ((String) o).toLowerCase(locale); + } + + return super.toString(o, formatString, locale); + } + + } + + @Override + public String processActionText(String text) { + // in rust `'` is not escapable so we don't care about inside string + return text.replaceAll("\\\\'", "'"); + } + + @Override + public boolean wantsBaseListener() { + return false; + } + + @Override + public boolean wantsBaseVisitor() { + return false; + } + + @Override + protected void appendUnicodeEscapedCodePoint(int codePoint, StringBuilder sb) { + // C99 and Python share the same escaping style. + UnicodeEscapes.appendSwiftStyleEscapedCodePoint(codePoint, sb); + } +}