From b35c7630ab7240304e67a43734700cf359acde0b Mon Sep 17 00:00:00 2001 From: Antony Blakey Date: Thu, 1 Feb 2024 21:30:26 +1100 Subject: [PATCH] Add Tree Query Language and Engine (#753) This pull request adds the CST Tree Query Language. It is a long way from being optimised, and in particular it should not need to allocate memory whilst backtracking, which it currently does. It is built with understandability as the top priority. This also includes some other changes that are not quite drive-by e.g. I rationalised the runtime crate structure and file naming to make more sense given I was adding a structural sub-module. --- .changeset/curvy-donkeys-shout.md | 5 + Cargo.lock | 66 +-- Cargo.toml | 6 +- .../parser/generator/src/rust_generator.rs | 57 ++- .../generator/src/typescript_generator.rs | 2 +- crates/codegen/parser/runtime/Cargo.toml | 1 + crates/codegen/parser/runtime/src/kinds.rs | 17 +- crates/codegen/parser/runtime/src/lexer.rs | 2 +- crates/codegen/parser/runtime/src/lib.rs | 5 +- .../mod.rs.jinja2 => mod_for_destination.rs} | 5 +- .../ast_selectors.rs} | 0 .../napi_cst.rs => napi_interface/cst.rs} | 8 +- .../cursor.rs} | 6 +- .../src/{napi => napi_interface}/mod.rs | 12 +- .../parse_error.rs} | 4 +- .../parse_output.rs} | 8 +- .../templates/ast_selectors.rs.jinja2 | 4 +- .../templates/ast_types.ts.jinja2 | 0 .../text_index.rs} | 2 +- .../choice_helper.rs | 4 +- .../{support => parser_support}/context.rs | 0 .../src/{support => parser_support}/mod.rs | 0 .../optional_helper.rs | 2 +- .../parser_function.rs | 4 +- .../parser_result.rs | 0 .../precedence_helper.rs | 6 +- .../{support => parser_support}/recovery.rs | 6 +- .../repetition_helper.rs | 4 +- .../scanner_macros.rs | 0 .../separated_helper.rs | 6 +- .../sequence_helper.rs | 2 +- .../parser/runtime/src/query/engine.rs | 478 +++++++++++++++++ .../parser/runtime/src/query/engine_tests.rs | 253 +++++++++ .../codegen/parser/runtime/src/query/mod.rs | 9 + .../runtime/src/query/mod_for_destination.rs | 3 + .../codegen/parser/runtime/src/query/model.rs | 172 +++++++ .../parser/runtime/src/query/parser.rs | 269 ++++++++++ .../parser/runtime/src/query/parser_tests.rs | 52 ++ .../runtime/src/templates/language.rs.jinja2 | 4 +- crates/codegen/parser/runtime/src/visitor.rs | 1 - .../solidity/outputs/cargo/crate/Cargo.toml | 1 + .../cargo/crate/src/generated/language.rs | 4 +- .../cargo/crate/src/generated/lexer.rs | 2 +- .../outputs/cargo/crate/src/generated/mod.rs | 5 +- .../ast_selectors.rs} | 4 +- .../napi_cst.rs => napi_interface/cst.rs} | 8 +- .../cursor.rs} | 6 +- .../generated/{napi => napi_interface}/mod.rs | 12 +- .../parse_error.rs} | 4 +- .../parse_output.rs} | 8 +- .../text_index.rs} | 2 +- .../choice_helper.rs | 4 +- .../{support => parser_support}/context.rs | 0 .../{support => parser_support}/mod.rs | 0 .../optional_helper.rs | 2 +- .../parser_function.rs | 4 +- .../parser_result.rs | 0 .../precedence_helper.rs | 6 +- .../{support => parser_support}/recovery.rs | 6 +- .../repetition_helper.rs | 4 +- .../scanner_macros.rs | 0 .../separated_helper.rs | 6 +- .../sequence_helper.rs | 2 +- .../cargo/crate/src/generated/query/engine.rs | 480 ++++++++++++++++++ .../cargo/crate/src/generated/query/mod.rs | 5 + .../cargo/crate/src/generated/query/model.rs | 174 +++++++ .../cargo/crate/src/generated/query/parser.rs | 271 ++++++++++ .../solidity/outputs/cargo/crate/src/main.rs | 2 +- crates/solidity/outputs/npm/crate/Cargo.toml | 1 + 69 files changed, 2357 insertions(+), 151 deletions(-) create mode 100644 .changeset/curvy-donkeys-shout.md rename crates/codegen/parser/runtime/src/{templates/mod.rs.jinja2 => mod_for_destination.rs} (76%) rename crates/codegen/parser/runtime/src/{napi/napi_ast_selectors.rs => napi_interface/ast_selectors.rs} (100%) rename crates/codegen/parser/runtime/src/{napi/napi_cst.rs => napi_interface/cst.rs} (94%) rename crates/codegen/parser/runtime/src/{napi/napi_cursor.rs => napi_interface/cursor.rs} (97%) rename crates/codegen/parser/runtime/src/{napi => napi_interface}/mod.rs (78%) rename crates/codegen/parser/runtime/src/{napi/napi_parse_error.rs => napi_interface/parse_error.rs} (89%) rename crates/codegen/parser/runtime/src/{napi/napi_parse_output.rs => napi_interface/parse_output.rs} (80%) rename crates/codegen/parser/runtime/src/{napi => napi_interface}/templates/ast_selectors.rs.jinja2 (98%) rename crates/codegen/parser/runtime/src/{napi => napi_interface}/templates/ast_types.ts.jinja2 (100%) rename crates/codegen/parser/runtime/src/{napi/napi_text_index.rs => napi_interface/text_index.rs} (94%) rename crates/codegen/parser/runtime/src/{support => parser_support}/choice_helper.rs (98%) rename crates/codegen/parser/runtime/src/{support => parser_support}/context.rs (100%) rename crates/codegen/parser/runtime/src/{support => parser_support}/mod.rs (100%) rename crates/codegen/parser/runtime/src/{support => parser_support}/optional_helper.rs (92%) rename crates/codegen/parser/runtime/src/{support => parser_support}/parser_function.rs (97%) rename crates/codegen/parser/runtime/src/{support => parser_support}/parser_result.rs (100%) rename crates/codegen/parser/runtime/src/{support => parser_support}/precedence_helper.rs (97%) rename crates/codegen/parser/runtime/src/{support => parser_support}/recovery.rs (97%) rename crates/codegen/parser/runtime/src/{support => parser_support}/repetition_helper.rs (96%) rename crates/codegen/parser/runtime/src/{support => parser_support}/scanner_macros.rs (100%) rename crates/codegen/parser/runtime/src/{support => parser_support}/separated_helper.rs (96%) rename crates/codegen/parser/runtime/src/{support => parser_support}/sequence_helper.rs (99%) create mode 100644 crates/codegen/parser/runtime/src/query/engine.rs create mode 100644 crates/codegen/parser/runtime/src/query/engine_tests.rs create mode 100644 crates/codegen/parser/runtime/src/query/mod.rs create mode 100644 crates/codegen/parser/runtime/src/query/mod_for_destination.rs create mode 100644 crates/codegen/parser/runtime/src/query/model.rs create mode 100644 crates/codegen/parser/runtime/src/query/parser.rs create mode 100644 crates/codegen/parser/runtime/src/query/parser_tests.rs delete mode 100644 crates/codegen/parser/runtime/src/visitor.rs rename crates/solidity/outputs/cargo/crate/src/generated/{napi/napi_ast_selectors.rs => napi_interface/ast_selectors.rs} (99%) rename crates/solidity/outputs/cargo/crate/src/generated/{napi/napi_cst.rs => napi_interface/cst.rs} (94%) rename crates/solidity/outputs/cargo/crate/src/generated/{napi/napi_cursor.rs => napi_interface/cursor.rs} (97%) rename crates/solidity/outputs/cargo/crate/src/generated/{napi => napi_interface}/mod.rs (81%) rename crates/solidity/outputs/cargo/crate/src/generated/{napi/napi_parse_error.rs => napi_interface/parse_error.rs} (90%) rename crates/solidity/outputs/cargo/crate/src/generated/{napi/napi_parse_output.rs => napi_interface/parse_output.rs} (81%) rename crates/solidity/outputs/cargo/crate/src/generated/{napi/napi_text_index.rs => napi_interface/text_index.rs} (95%) rename crates/solidity/outputs/cargo/crate/src/generated/{support => parser_support}/choice_helper.rs (98%) rename crates/solidity/outputs/cargo/crate/src/generated/{support => parser_support}/context.rs (100%) rename crates/solidity/outputs/cargo/crate/src/generated/{support => parser_support}/mod.rs (100%) rename crates/solidity/outputs/cargo/crate/src/generated/{support => parser_support}/optional_helper.rs (93%) rename crates/solidity/outputs/cargo/crate/src/generated/{support => parser_support}/parser_function.rs (97%) rename crates/solidity/outputs/cargo/crate/src/generated/{support => parser_support}/parser_result.rs (100%) rename crates/solidity/outputs/cargo/crate/src/generated/{support => parser_support}/precedence_helper.rs (97%) rename crates/solidity/outputs/cargo/crate/src/generated/{support => parser_support}/recovery.rs (97%) rename crates/solidity/outputs/cargo/crate/src/generated/{support => parser_support}/repetition_helper.rs (96%) rename crates/solidity/outputs/cargo/crate/src/generated/{support => parser_support}/scanner_macros.rs (100%) rename crates/solidity/outputs/cargo/crate/src/generated/{support => parser_support}/separated_helper.rs (96%) rename crates/solidity/outputs/cargo/crate/src/generated/{support => parser_support}/sequence_helper.rs (99%) create mode 100644 crates/solidity/outputs/cargo/crate/src/generated/query/engine.rs create mode 100644 crates/solidity/outputs/cargo/crate/src/generated/query/mod.rs create mode 100644 crates/solidity/outputs/cargo/crate/src/generated/query/model.rs create mode 100644 crates/solidity/outputs/cargo/crate/src/generated/query/parser.rs diff --git a/.changeset/curvy-donkeys-shout.md b/.changeset/curvy-donkeys-shout.md new file mode 100644 index 0000000000..b542ab15cc --- /dev/null +++ b/.changeset/curvy-donkeys-shout.md @@ -0,0 +1,5 @@ +--- +"@nomicfoundation/slang": minor +--- + +Add tree query implementation as `Query::parse` and `Cursor::query` diff --git a/Cargo.lock b/Cargo.lock index ffa8d93e40..e623982403 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -317,7 +317,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.29", + "syn", ] [[package]] @@ -360,7 +360,7 @@ dependencies = [ "semver", "serde", "strum_macros", - "syn 2.0.29", + "syn", "thiserror", ] @@ -371,7 +371,7 @@ dependencies = [ "itertools", "proc-macro2", "quote", - "syn 2.0.29", + "syn", ] [[package]] @@ -418,6 +418,7 @@ dependencies = [ "napi", "napi-build", "napi-derive", + "nom", "serde", "strum", "strum_macros", @@ -558,7 +559,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f34ba9a9bcb8645379e9de8cb3ecfcf4d1c85ba66d90deb3259206fa5aa193b" dependencies = [ "quote", - "syn 2.0.29", + "syn", ] [[package]] @@ -569,7 +570,7 @@ checksum = "d150dea618e920167e5973d70ae6ece4385b7164e0d799fe7c122dd0a5d912ad" dependencies = [ "proc-macro2", "quote", - "syn 2.0.29", + "syn", ] [[package]] @@ -1175,6 +1176,12 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.7.1" @@ -1227,7 +1234,7 @@ dependencies = [ "napi-derive-backend", "proc-macro2", "quote", - "syn 2.0.29", + "syn", ] [[package]] @@ -1242,7 +1249,7 @@ dependencies = [ "quote", "regex", "semver", - "syn 2.0.29", + "syn", ] [[package]] @@ -1272,6 +1279,16 @@ dependencies = [ "tempfile", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num-traits" version = "0.2.16" @@ -1335,7 +1352,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.29", + "syn", ] [[package]] @@ -1410,7 +1427,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.29", + "syn", ] [[package]] @@ -1769,7 +1786,7 @@ checksum = "aafe972d60b0b9bee71a91b92fee2d4fb3c9d7e8f6b179aa99f27203d99a4816" dependencies = [ "proc-macro2", "quote", - "syn 2.0.29", + "syn", ] [[package]] @@ -1860,6 +1877,7 @@ dependencies = [ "clap", "codegen_parser_generator", "infra_utils", + "nom", "semver", "serde", "serde_json", @@ -1929,6 +1947,7 @@ dependencies = [ "napi", "napi-build", "napi-derive", + "nom", "semver", "serde", "slang_solidity", @@ -2015,21 +2034,21 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "strum" -version = "0.24.1" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" [[package]] name = "strum_macros" -version = "0.24.3" +version = "0.25.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" dependencies = [ "heck", "proc-macro2", "quote", "rustversion", - "syn 1.0.109", + "syn", ] [[package]] @@ -2042,17 +2061,6 @@ dependencies = [ "is_ci", ] -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - [[package]] name = "syn" version = "2.0.29" @@ -2157,7 +2165,7 @@ checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96" dependencies = [ "proc-macro2", "quote", - "syn 2.0.29", + "syn", ] [[package]] @@ -2483,7 +2491,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.29", + "syn", "wasm-bindgen-shared", ] @@ -2517,7 +2525,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.29", + "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] diff --git a/Cargo.toml b/Cargo.toml index 4e302e400f..c47c33cc43 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -76,6 +76,7 @@ markdown = { version = "0.3.0" } napi = { version = "2.14.2", features = ["compat-mode", "napi8", "serde-json"] } napi-build = { version = "2.1.0" } napi-derive = { version = "2.14.6" } +nom = { version = "7.1.3" } once_cell = { version = "1.19.0" } owo-colors = { version = "3.5.0", features = ["supports-colors"] } proc-macro2 = { version = "1.0.53" } @@ -87,8 +88,9 @@ semver = { version = "1.0.17", features = ["serde"] } serde = { version = "1.0.158", features = ["derive", "rc"] } serde_json = { version = "1.0.94", features = ["preserve_order"] } similar-asserts = { version = "1.4.2" } -strum = { version = "0.24.0" } -strum_macros = { version = "0.24.0" } +stack-graphs = { version = "0.12.0" } +strum = { version = "0.25.0" } +strum_macros = { version = "0.25.3" } syn = { version = "2.0.29", features = [ "fold", "full", diff --git a/crates/codegen/parser/generator/src/rust_generator.rs b/crates/codegen/parser/generator/src/rust_generator.rs index adbcdbf27a..5eb9d5168d 100644 --- a/crates/codegen/parser/generator/src/rust_generator.rs +++ b/crates/codegen/parser/generator/src/rust_generator.rs @@ -80,8 +80,8 @@ impl RustGenerator { Context { ast_model: AstModel::create(language), }, - runtime_dir.join("napi/templates/ast_selectors.rs.jinja2"), - output_dir.join("napi/napi_ast_selectors.rs"), + runtime_dir.join("napi_interface/templates/ast_selectors.rs.jinja2"), + output_dir.join("napi_interface/ast_selectors.rs"), )?; } @@ -115,13 +115,13 @@ impl RustGenerator { )?; } - { - #[derive(Serialize)] - struct Context {} - codegen.render( - Context {}, - runtime_dir.join("templates/mod.rs.jinja2"), - output_dir.join("mod.rs"), + for (src_file, destination_file) in &[ + ("query/mod_for_destination.rs", "query/mod.rs"), + ("mod_for_destination.rs", "mod.rs"), + ] { + codegen.copy_file( + runtime_dir.join(src_file), + output_dir.join(destination_file), )?; } @@ -131,25 +131,28 @@ impl RustGenerator { "lexer.rs", "parse_error.rs", "parse_output.rs", + "query/engine.rs", + "query/model.rs", + "query/parser.rs", "text_index.rs", - "napi/napi_cst.rs", - "napi/napi_cursor.rs", - "napi/napi_parse_error.rs", - "napi/napi_parse_output.rs", - "napi/napi_text_index.rs", - "napi/mod.rs", - "support/mod.rs", - "support/context.rs", - "support/parser_function.rs", - "support/optional_helper.rs", - "support/sequence_helper.rs", - "support/repetition_helper.rs", - "support/choice_helper.rs", - "support/precedence_helper.rs", - "support/parser_result.rs", - "support/recovery.rs", - "support/separated_helper.rs", - "support/scanner_macros.rs", + "napi_interface/cst.rs", + "napi_interface/cursor.rs", + "napi_interface/parse_error.rs", + "napi_interface/parse_output.rs", + "napi_interface/text_index.rs", + "napi_interface/mod.rs", + "parser_support/mod.rs", + "parser_support/context.rs", + "parser_support/parser_function.rs", + "parser_support/optional_helper.rs", + "parser_support/sequence_helper.rs", + "parser_support/repetition_helper.rs", + "parser_support/choice_helper.rs", + "parser_support/precedence_helper.rs", + "parser_support/parser_result.rs", + "parser_support/recovery.rs", + "parser_support/separated_helper.rs", + "parser_support/scanner_macros.rs", ] { codegen.copy_file(runtime_dir.join(file), output_dir.join(file))?; } diff --git a/crates/codegen/parser/generator/src/typescript_generator.rs b/crates/codegen/parser/generator/src/typescript_generator.rs index 86cb6d5495..282ba1d84c 100644 --- a/crates/codegen/parser/generator/src/typescript_generator.rs +++ b/crates/codegen/parser/generator/src/typescript_generator.rs @@ -26,7 +26,7 @@ impl TypeScriptGenerator { Context { ast_model: AstModel::create(language), }, - runtime_dir.join("napi/templates/ast_types.ts.jinja2"), + runtime_dir.join("napi_interface/templates/ast_types.ts.jinja2"), output_dir.join("src/ast/generated/ast_types.ts"), )?; } diff --git a/crates/codegen/parser/runtime/Cargo.toml b/crates/codegen/parser/runtime/Cargo.toml index 8d74028ae3..0fb849002e 100644 --- a/crates/codegen/parser/runtime/Cargo.toml +++ b/crates/codegen/parser/runtime/Cargo.toml @@ -15,6 +15,7 @@ napi-build = { workspace = true, optional = true } ariadne = { workspace = true } napi = { workspace = true, optional = true } napi-derive = { workspace = true, optional = true } +nom = { workspace = true } serde = { workspace = true } strum = { workspace = true } strum_macros = { workspace = true } diff --git a/crates/codegen/parser/runtime/src/kinds.rs b/crates/codegen/parser/runtime/src/kinds.rs index 2b1ac308ca..2d25729b69 100644 --- a/crates/codegen/parser/runtime/src/kinds.rs +++ b/crates/codegen/parser/runtime/src/kinds.rs @@ -17,7 +17,11 @@ use napi_derive::napi; #[cfg_attr(not(feature = "slang_napi_interfaces"), derive(Clone, Copy))] pub enum TokenKind { SKIPPED, - // Expanded by the template engine + // Used for testing this crate, this is generated in the client code + Identifier, + Token1, + Token2, + Token3, } #[derive( @@ -37,7 +41,10 @@ pub enum TokenKind { pub enum RuleKind { LeadingTrivia, TrailingTrivia, - // Expanded by the template engine + // Used for testing this crate, this is generated in the client code + Rule1, + Rule2, + Rule3, } impl RuleKind { @@ -69,8 +76,10 @@ pub enum FieldName { Operand, LeftOperand, RightOperand, - // Generated - XXX, + // Used for testing this crate, this is generated in the client code + Name1, + Name2, + Name3, } /// The lexical context of the scanner. diff --git a/crates/codegen/parser/runtime/src/lexer.rs b/crates/codegen/parser/runtime/src/lexer.rs index 5923dfdf7b..cfc6ebbead 100644 --- a/crates/codegen/parser/runtime/src/lexer.rs +++ b/crates/codegen/parser/runtime/src/lexer.rs @@ -1,6 +1,6 @@ use crate::cst::{self, NamedNode}; use crate::kinds::{IsLexicalContext, TokenKind}; -use crate::support::{ParserContext, ParserResult}; +use crate::parser_support::{ParserContext, ParserResult}; /// Whether a keyword has been scanned and if so, whether it is reserved (unusable as an identifier) /// or not. diff --git a/crates/codegen/parser/runtime/src/lib.rs b/crates/codegen/parser/runtime/src/lib.rs index 8ed2f8ed55..cc8b151369 100644 --- a/crates/codegen/parser/runtime/src/lib.rs +++ b/crates/codegen/parser/runtime/src/lib.rs @@ -1,7 +1,7 @@ #![allow(dead_code)] #[macro_use] -mod support; +mod parser_support; pub mod cst; pub mod cursor; @@ -9,7 +9,8 @@ pub mod kinds; pub(crate) mod lexer; pub mod parse_error; pub mod parse_output; +pub mod query; pub mod text_index; #[cfg(feature = "slang_napi_interfaces")] -pub mod napi; +pub mod napi_interface; diff --git a/crates/codegen/parser/runtime/src/templates/mod.rs.jinja2 b/crates/codegen/parser/runtime/src/mod_for_destination.rs similarity index 76% rename from crates/codegen/parser/runtime/src/templates/mod.rs.jinja2 rename to crates/codegen/parser/runtime/src/mod_for_destination.rs index 4bb91487cf..a2b258a41c 100644 --- a/crates/codegen/parser/runtime/src/templates/mod.rs.jinja2 +++ b/crates/codegen/parser/runtime/src/mod_for_destination.rs @@ -1,5 +1,5 @@ #[macro_use] -pub mod support; +pub mod parser_support; pub mod cst; pub mod cursor; @@ -8,7 +8,8 @@ pub mod language; pub(crate) mod lexer; pub mod parse_error; pub mod parse_output; +pub mod query; pub mod text_index; #[cfg(feature = "slang_napi_interfaces")] -pub mod napi; +pub mod napi_interface; diff --git a/crates/codegen/parser/runtime/src/napi/napi_ast_selectors.rs b/crates/codegen/parser/runtime/src/napi_interface/ast_selectors.rs similarity index 100% rename from crates/codegen/parser/runtime/src/napi/napi_ast_selectors.rs rename to crates/codegen/parser/runtime/src/napi_interface/ast_selectors.rs diff --git a/crates/codegen/parser/runtime/src/napi/napi_cst.rs b/crates/codegen/parser/runtime/src/napi_interface/cst.rs similarity index 94% rename from crates/codegen/parser/runtime/src/napi/napi_cst.rs rename to crates/codegen/parser/runtime/src/napi_interface/cst.rs index 018f6ff9b1..8cc4c142a0 100644 --- a/crates/codegen/parser/runtime/src/napi/napi_cst.rs +++ b/crates/codegen/parser/runtime/src/napi_interface/cst.rs @@ -4,9 +4,11 @@ use napi::bindgen_prelude::{Env, ToNapiValue}; use napi::{JsObject, NapiValue}; use napi_derive::napi; -use crate::napi::napi_cursor::Cursor; -use crate::napi::napi_text_index::TextIndex; -use crate::napi::{RuleKind, RustNode, RustRuleNode, RustTextIndex, RustTokenNode, TokenKind}; +use crate::napi_interface::cursor::Cursor; +use crate::napi_interface::text_index::TextIndex; +use crate::napi_interface::{ + RuleKind, RustNode, RustRuleNode, RustTextIndex, RustTokenNode, TokenKind, +}; #[napi(namespace = "cst")] pub enum NodeType { diff --git a/crates/codegen/parser/runtime/src/napi/napi_cursor.rs b/crates/codegen/parser/runtime/src/napi_interface/cursor.rs similarity index 97% rename from crates/codegen/parser/runtime/src/napi/napi_cursor.rs rename to crates/codegen/parser/runtime/src/napi_interface/cursor.rs index cb71f0632e..27b7d235de 100644 --- a/crates/codegen/parser/runtime/src/napi/napi_cursor.rs +++ b/crates/codegen/parser/runtime/src/napi_interface/cursor.rs @@ -3,13 +3,13 @@ // The functions are meant to be definitions for export, so they're not really used #![allow(clippy::return_self_not_must_use)] +use cst::ToJS; use napi::bindgen_prelude::Env; use napi::JsObject; -use napi_cst::ToJS; use napi_derive::napi; -use napi_text_index::{TextIndex, TextRange}; +use text_index::{TextIndex, TextRange}; -use crate::napi::{napi_cst, napi_text_index, FieldName, RuleKind, RustCursor, TokenKind}; +use crate::napi_interface::{cst, text_index, FieldName, RuleKind, RustCursor, TokenKind}; #[napi(namespace = "cursor")] pub struct Cursor(Box); diff --git a/crates/codegen/parser/runtime/src/napi/mod.rs b/crates/codegen/parser/runtime/src/napi_interface/mod.rs similarity index 78% rename from crates/codegen/parser/runtime/src/napi/mod.rs rename to crates/codegen/parser/runtime/src/napi_interface/mod.rs index 4602518d54..5b70916b68 100644 --- a/crates/codegen/parser/runtime/src/napi/mod.rs +++ b/crates/codegen/parser/runtime/src/napi_interface/mod.rs @@ -1,9 +1,9 @@ -pub mod napi_ast_selectors; -pub mod napi_cst; -pub mod napi_cursor; -pub mod napi_parse_error; -pub mod napi_parse_output; -pub mod napi_text_index; +pub mod ast_selectors; +pub mod cst; +pub mod cursor; +pub mod parse_error; +pub mod parse_output; +pub mod text_index; type RustCursor = crate::cursor::Cursor; type RustNamedNode = crate::cst::NamedNode; diff --git a/crates/codegen/parser/runtime/src/napi/napi_parse_error.rs b/crates/codegen/parser/runtime/src/napi_interface/parse_error.rs similarity index 89% rename from crates/codegen/parser/runtime/src/napi/napi_parse_error.rs rename to crates/codegen/parser/runtime/src/napi_interface/parse_error.rs index 00aa2e2d0f..8a2a301fe2 100644 --- a/crates/codegen/parser/runtime/src/napi/napi_parse_error.rs +++ b/crates/codegen/parser/runtime/src/napi_interface/parse_error.rs @@ -2,9 +2,9 @@ #![allow(clippy::needless_pass_by_value)] use napi_derive::napi; -use napi_text_index::TextRange; +use text_index::TextRange; -use crate::napi::{napi_text_index, RustParseError}; +use crate::napi_interface::{text_index, RustParseError}; #[napi(namespace = "parse_error")] #[derive(PartialEq, Clone)] diff --git a/crates/codegen/parser/runtime/src/napi/napi_parse_output.rs b/crates/codegen/parser/runtime/src/napi_interface/parse_output.rs similarity index 80% rename from crates/codegen/parser/runtime/src/napi/napi_parse_output.rs rename to crates/codegen/parser/runtime/src/napi_interface/parse_output.rs index d0ef152b5e..b380b1cb46 100644 --- a/crates/codegen/parser/runtime/src/napi/napi_parse_output.rs +++ b/crates/codegen/parser/runtime/src/napi_interface/parse_output.rs @@ -1,8 +1,8 @@ +use cst::ToJS; use napi::bindgen_prelude::Env; -use napi_cst::ToJS; use napi_derive::napi; -use crate::napi::{napi_cst, napi_cursor, napi_parse_error, RustParseOutput}; +use crate::napi_interface::{cst, cursor, parse_error, RustParseOutput}; #[napi(namespace = "parse_output")] pub struct ParseOutput(RustParseOutput); @@ -21,7 +21,7 @@ impl ParseOutput { } #[napi(ts_return_type = "Array", catch_unwind)] - pub fn errors(&self) -> Vec { + pub fn errors(&self) -> Vec { self.0.errors().iter().map(|x| x.clone().into()).collect() } @@ -32,7 +32,7 @@ impl ParseOutput { /// Creates a cursor that starts at the root of the parse tree. #[napi(ts_return_type = "cursor.Cursor", catch_unwind)] - pub fn create_tree_cursor(&self) -> napi_cursor::Cursor { + pub fn create_tree_cursor(&self) -> cursor::Cursor { self.0.create_tree_cursor().into() } } diff --git a/crates/codegen/parser/runtime/src/napi/templates/ast_selectors.rs.jinja2 b/crates/codegen/parser/runtime/src/napi_interface/templates/ast_selectors.rs.jinja2 similarity index 98% rename from crates/codegen/parser/runtime/src/napi/templates/ast_selectors.rs.jinja2 rename to crates/codegen/parser/runtime/src/napi_interface/templates/ast_selectors.rs.jinja2 index ae96584cc9..9903e787b5 100644 --- a/crates/codegen/parser/runtime/src/napi/templates/ast_selectors.rs.jinja2 +++ b/crates/codegen/parser/runtime/src/napi_interface/templates/ast_selectors.rs.jinja2 @@ -5,8 +5,8 @@ use std::rc::Rc; use napi::{Env, JsObject}; use napi_derive::napi; -use crate::napi::napi_cst::{RuleNode, ToJS}; -use crate::napi::{RuleKind, RustNamedNode, RustNode, RustRuleNode, TokenKind}; +use crate::napi_interface::cst::{RuleNode, ToJS}; +use crate::napi_interface::{RuleKind, RustNamedNode, RustNode, RustRuleNode, TokenKind}; // // Sequences: diff --git a/crates/codegen/parser/runtime/src/napi/templates/ast_types.ts.jinja2 b/crates/codegen/parser/runtime/src/napi_interface/templates/ast_types.ts.jinja2 similarity index 100% rename from crates/codegen/parser/runtime/src/napi/templates/ast_types.ts.jinja2 rename to crates/codegen/parser/runtime/src/napi_interface/templates/ast_types.ts.jinja2 diff --git a/crates/codegen/parser/runtime/src/napi/napi_text_index.rs b/crates/codegen/parser/runtime/src/napi_interface/text_index.rs similarity index 94% rename from crates/codegen/parser/runtime/src/napi/napi_text_index.rs rename to crates/codegen/parser/runtime/src/napi_interface/text_index.rs index 78710c3369..5920bf093b 100644 --- a/crates/codegen/parser/runtime/src/napi/napi_text_index.rs +++ b/crates/codegen/parser/runtime/src/napi_interface/text_index.rs @@ -1,6 +1,6 @@ use napi_derive::napi; -use crate::napi::{RustTextIndex, RustTextRange}; +use crate::napi_interface::{RustTextIndex, RustTextRange}; #[napi(object, namespace = "text_index")] #[derive(Copy, Clone)] diff --git a/crates/codegen/parser/runtime/src/support/choice_helper.rs b/crates/codegen/parser/runtime/src/parser_support/choice_helper.rs similarity index 98% rename from crates/codegen/parser/runtime/src/support/choice_helper.rs rename to crates/codegen/parser/runtime/src/parser_support/choice_helper.rs index 3a50bb03b1..2aaca22274 100644 --- a/crates/codegen/parser/runtime/src/support/choice_helper.rs +++ b/crates/codegen/parser/runtime/src/parser_support/choice_helper.rs @@ -4,8 +4,8 @@ use std::ops::ControlFlow; use crate::cst; use crate::kinds::TokenKind; use crate::parse_error::ParseError; -use crate::support::context::{Marker, ParserContext}; -use crate::support::ParserResult; +use crate::parser_support::context::{Marker, ParserContext}; +use crate::parser_support::ParserResult; use crate::text_index::TextIndex; /// Starting from a given position in the input, this helper will try to pick (and remember) a best match. Settles on diff --git a/crates/codegen/parser/runtime/src/support/context.rs b/crates/codegen/parser/runtime/src/parser_support/context.rs similarity index 100% rename from crates/codegen/parser/runtime/src/support/context.rs rename to crates/codegen/parser/runtime/src/parser_support/context.rs diff --git a/crates/codegen/parser/runtime/src/support/mod.rs b/crates/codegen/parser/runtime/src/parser_support/mod.rs similarity index 100% rename from crates/codegen/parser/runtime/src/support/mod.rs rename to crates/codegen/parser/runtime/src/parser_support/mod.rs diff --git a/crates/codegen/parser/runtime/src/support/optional_helper.rs b/crates/codegen/parser/runtime/src/parser_support/optional_helper.rs similarity index 92% rename from crates/codegen/parser/runtime/src/support/optional_helper.rs rename to crates/codegen/parser/runtime/src/parser_support/optional_helper.rs index 6d06d36e0b..94c8543f42 100644 --- a/crates/codegen/parser/runtime/src/support/optional_helper.rs +++ b/crates/codegen/parser/runtime/src/parser_support/optional_helper.rs @@ -1,4 +1,4 @@ -use crate::support::parser_result::ParserResult; +use crate::parser_support::parser_result::ParserResult; pub struct OptionalHelper; diff --git a/crates/codegen/parser/runtime/src/support/parser_function.rs b/crates/codegen/parser/runtime/src/parser_support/parser_function.rs similarity index 97% rename from crates/codegen/parser/runtime/src/support/parser_function.rs rename to crates/codegen/parser/runtime/src/parser_support/parser_function.rs index 3b80fa40e7..0c30bd73f2 100644 --- a/crates/codegen/parser/runtime/src/support/parser_function.rs +++ b/crates/codegen/parser/runtime/src/parser_support/parser_function.rs @@ -5,8 +5,8 @@ use crate::kinds::TokenKind; use crate::lexer::Lexer; use crate::parse_error::ParseError; use crate::parse_output::ParseOutput; -use crate::support::context::ParserContext; -use crate::support::parser_result::{IncompleteMatch, Match, ParserResult, SkippedUntil}; +use crate::parser_support::context::ParserContext; +use crate::parser_support::parser_result::{IncompleteMatch, Match, ParserResult, SkippedUntil}; use crate::text_index::TextIndex; pub trait ParserFunction diff --git a/crates/codegen/parser/runtime/src/support/parser_result.rs b/crates/codegen/parser/runtime/src/parser_support/parser_result.rs similarity index 100% rename from crates/codegen/parser/runtime/src/support/parser_result.rs rename to crates/codegen/parser/runtime/src/parser_support/parser_result.rs diff --git a/crates/codegen/parser/runtime/src/support/precedence_helper.rs b/crates/codegen/parser/runtime/src/parser_support/precedence_helper.rs similarity index 97% rename from crates/codegen/parser/runtime/src/support/precedence_helper.rs rename to crates/codegen/parser/runtime/src/parser_support/precedence_helper.rs index 340516f6d6..b65144e174 100644 --- a/crates/codegen/parser/runtime/src/support/precedence_helper.rs +++ b/crates/codegen/parser/runtime/src/parser_support/precedence_helper.rs @@ -1,7 +1,9 @@ use crate::cst::{self, NamedNode}; use crate::kinds::{FieldName, RuleKind}; -use crate::support::parser_result::PrattElement::{self, Binary, Expression, Postfix, Prefix}; -use crate::support::parser_result::{ParserResult, PrattOperatorMatch}; +use crate::parser_support::parser_result::PrattElement::{ + self, Binary, Expression, Postfix, Prefix, +}; +use crate::parser_support::parser_result::{ParserResult, PrattOperatorMatch}; pub struct PrecedenceHelper; diff --git a/crates/codegen/parser/runtime/src/support/recovery.rs b/crates/codegen/parser/runtime/src/parser_support/recovery.rs similarity index 97% rename from crates/codegen/parser/runtime/src/support/recovery.rs rename to crates/codegen/parser/runtime/src/parser_support/recovery.rs index 0df7a28b87..3c2cca5382 100644 --- a/crates/codegen/parser/runtime/src/support/recovery.rs +++ b/crates/codegen/parser/runtime/src/parser_support/recovery.rs @@ -2,9 +2,9 @@ use crate::cst; use crate::kinds::{IsLexicalContext, TokenKind}; use crate::lexer::{Lexer, ScannedToken}; use crate::parse_error::ParseError; -use crate::support::context::ParserContext; -use crate::support::parser_result::SkippedUntil; -use crate::support::ParserResult; +use crate::parser_support::context::ParserContext; +use crate::parser_support::parser_result::SkippedUntil; +use crate::parser_support::ParserResult; use crate::text_index::{TextRange, TextRangeExtensions as _}; /// An explicit parameter for the [`ParserResult::recover_until_with_nested_delims`] method. diff --git a/crates/codegen/parser/runtime/src/support/repetition_helper.rs b/crates/codegen/parser/runtime/src/parser_support/repetition_helper.rs similarity index 96% rename from crates/codegen/parser/runtime/src/support/repetition_helper.rs rename to crates/codegen/parser/runtime/src/parser_support/repetition_helper.rs index 2fcf23491d..5f1d58f80a 100644 --- a/crates/codegen/parser/runtime/src/support/repetition_helper.rs +++ b/crates/codegen/parser/runtime/src/parser_support/repetition_helper.rs @@ -1,5 +1,5 @@ -use crate::support::context::ParserContext; -use crate::support::parser_result::{IncompleteMatch, NoMatch, ParserResult}; +use crate::parser_support::context::ParserContext; +use crate::parser_support::parser_result::{IncompleteMatch, NoMatch, ParserResult}; pub struct RepetitionHelper; diff --git a/crates/codegen/parser/runtime/src/support/scanner_macros.rs b/crates/codegen/parser/runtime/src/parser_support/scanner_macros.rs similarity index 100% rename from crates/codegen/parser/runtime/src/support/scanner_macros.rs rename to crates/codegen/parser/runtime/src/parser_support/scanner_macros.rs diff --git a/crates/codegen/parser/runtime/src/support/separated_helper.rs b/crates/codegen/parser/runtime/src/parser_support/separated_helper.rs similarity index 96% rename from crates/codegen/parser/runtime/src/support/separated_helper.rs rename to crates/codegen/parser/runtime/src/parser_support/separated_helper.rs index ec586ffdd4..a9623a96c3 100644 --- a/crates/codegen/parser/runtime/src/support/separated_helper.rs +++ b/crates/codegen/parser/runtime/src/parser_support/separated_helper.rs @@ -2,9 +2,9 @@ use crate::cst::{self, NamedNode}; use crate::kinds::{FieldName, IsLexicalContext, TokenKind}; use crate::lexer::Lexer; use crate::parse_error::ParseError; -use crate::support::parser_result::{ParserResult, SkippedUntil}; -use crate::support::recovery::skip_until_with_nested_delims; -use crate::support::ParserContext; +use crate::parser_support::parser_result::{ParserResult, SkippedUntil}; +use crate::parser_support::recovery::skip_until_with_nested_delims; +use crate::parser_support::ParserContext; use crate::text_index::TextRangeExtensions; pub struct SeparatedHelper; diff --git a/crates/codegen/parser/runtime/src/support/sequence_helper.rs b/crates/codegen/parser/runtime/src/parser_support/sequence_helper.rs similarity index 99% rename from crates/codegen/parser/runtime/src/support/sequence_helper.rs rename to crates/codegen/parser/runtime/src/parser_support/sequence_helper.rs index 5848ef9288..c58371b7d3 100644 --- a/crates/codegen/parser/runtime/src/support/sequence_helper.rs +++ b/crates/codegen/parser/runtime/src/parser_support/sequence_helper.rs @@ -2,7 +2,7 @@ use std::ops::ControlFlow; use crate::cst::{self, NamedNode}; use crate::kinds::{FieldName, TokenKind}; -use crate::support::parser_result::{Match, ParserResult, PrattElement, SkippedUntil}; +use crate::parser_support::parser_result::{Match, ParserResult, PrattElement, SkippedUntil}; /// Keeps accumulating parses sequentially until it hits an incomplete or no match. #[must_use] diff --git a/crates/codegen/parser/runtime/src/query/engine.rs b/crates/codegen/parser/runtime/src/query/engine.rs new file mode 100644 index 0000000000..4e5bca3d48 --- /dev/null +++ b/crates/codegen/parser/runtime/src/query/engine.rs @@ -0,0 +1,478 @@ +use std::collections::HashMap; +use std::rc::Rc; + +// This crate is copied to another crate, so all imports should be relative +use super::super::cst; +use super::super::cursor::Cursor; +use super::model::{ + AlternativesMatcher, BindingMatcher, Kind, Matcher, NodeMatcher, NodeSelector, + OneOrMoreMatcher, OptionalMatcher, Query, SequenceMatcher, +}; + +impl Cursor { + pub fn query(self, queries: Vec) -> QueryResultIterator { + QueryResultIterator::new(self, queries) + } + + fn irrevocably_go_to_next_sibling(&mut self) -> bool { + if self.is_completed() { + false + } else { + if !self.go_to_next_sibling() { + self.complete(); + } + true + } + } + + fn matches_node_selector(&self, node_selector: &NodeSelector) -> bool { + match self.node() { + cst::Node::Rule(rule) => match node_selector { + NodeSelector::Anonymous => true, + NodeSelector::Kind { kind } => Kind::Rule(rule.kind) == *kind, + NodeSelector::Text { .. } => false, + NodeSelector::FieldName { field_name } => Some(*field_name) == self.node_name(), + NodeSelector::FieldNameAndKind { field_name, kind } => { + Some(*field_name) == self.node_name() && Kind::Rule(rule.kind) == *kind + } + NodeSelector::FieldNameAndText { .. } => false, + }, + + cst::Node::Token(token) => match node_selector { + NodeSelector::Anonymous => true, + NodeSelector::Kind { kind } => Kind::Token(token.kind) == *kind, + NodeSelector::Text { text } => token.text == *text, + NodeSelector::FieldName { field_name } => Some(*field_name) == self.node_name(), + NodeSelector::FieldNameAndKind { field_name, kind } => { + Some(*field_name) == self.node_name() && Kind::Token(token.kind) == *kind + } + NodeSelector::FieldNameAndText { field_name, text } => { + Some(*field_name) == self.node_name() && token.text == *text + } + }, + } + } +} + +impl Matcher { + // This allows for queries to pre-flight against a cursor without allocating + fn can_match(&self, cursor: &Cursor) -> bool { + match self { + Self::Binding(matcher) => matcher.child.can_match(cursor), + Self::Node(matcher) => cursor.matches_node_selector(&matcher.node_selector), + Self::Alternatives(matcher) => matcher.children.iter().any(|c| c.can_match(cursor)), + Self::Sequence(matcher) => matcher.children[0].can_match(cursor), + Self::OneOrMore(matcher) => matcher.child.can_match(cursor), + Self::Optional(_) => true, + Self::Ellipsis => true, + } + } + + fn create_combinator(&self, cursor: Cursor) -> CombinatorRef { + match self { + Self::Binding(matcher) => Box::new(BindingCombinator::new(matcher.clone(), cursor)), + Self::Node(matcher) => Box::new(NodeCombinator::new(matcher.clone(), cursor)), + Self::Sequence(matcher) => Box::new(SequenceCombinator::new(matcher.clone(), cursor)), + Self::Alternatives(matcher) => { + Box::new(AlternativesCombinator::new(matcher.clone(), cursor)) + } + Self::Optional(matcher) => Box::new(OptionalCombinator::new(matcher.clone(), cursor)), + Self::OneOrMore(matcher) => Box::new(OneOrMoreCombinator::new(matcher.clone(), cursor)), + Self::Ellipsis => Box::new(EllipsisCombinator::new(cursor)), + } + } +} + +pub struct QueryResult { + pub query_number: usize, + pub bindings: HashMap>, +} + +pub struct QueryResultIterator { + cursor: Cursor, + queries: Vec, + query_number: usize, + combinator: Option, +} + +impl QueryResultIterator { + fn new(cursor: Cursor, queries: Vec) -> Self { + Self { + cursor, + queries, + query_number: 0, + combinator: None, + } + } + + fn advance_to_next_possible_matching_query(&mut self) { + while !self.cursor.is_completed() { + while self.query_number < self.queries.len() { + let matcher = &self.queries[self.query_number].0; + if matcher.can_match(&self.cursor) { + self.combinator = Some(matcher.create_combinator(self.cursor.clone())); + return; + }; + self.query_number += 1; + } + self.cursor.go_to_next(); + self.query_number = 0; + } + } +} + +impl Iterator for QueryResultIterator { + type Item = QueryResult; + + fn next(&mut self) -> Option { + while !self.cursor.is_completed() { + if let Some(combinator) = self.combinator.as_mut() { + if combinator.next().is_some() { + let mut bindings = HashMap::new(); + combinator.accumulate_bindings(&mut bindings); + return Some(QueryResult { + query_number: self.query_number, + bindings, + }); + } + self.query_number += 1; + } + + self.advance_to_next_possible_matching_query(); + } + + None + } +} + +trait Combinator { + // None -> failed to match, you must backtrack. DO NOT call again + // Some(cursor) if cursor.is_complete -> matched, end of input + // Some(cursor) if !cursor.is_complete -> matched, more input to go + fn next(&mut self) -> Option; + fn accumulate_bindings(&self, bindings: &mut HashMap>); +} +type CombinatorRef = Box; + +struct BindingCombinator { + matcher: Rc, + cursor: Cursor, + child: CombinatorRef, +} + +impl BindingCombinator { + fn new(matcher: Rc, cursor: Cursor) -> Self { + let child = matcher.child.create_combinator(cursor.clone()); + Self { + matcher, + cursor, + child, + } + } +} + +impl Combinator for BindingCombinator { + fn next(&mut self) -> Option { + self.child.next() + } + + fn accumulate_bindings(&self, bindings: &mut HashMap>) { + bindings + .entry(self.matcher.name.clone()) + .or_default() + .push(self.cursor.clone()); + } +} + +struct NodeCombinator { + matcher: Rc, + child: Option, + cursor: Cursor, + is_initialised: bool, +} + +impl NodeCombinator { + fn new(matcher: Rc, cursor: Cursor) -> Self { + Self { + matcher, + child: None, + cursor, + is_initialised: false, + } + } +} + +impl Combinator for NodeCombinator { + fn next(&mut self) -> Option { + if self.cursor.is_completed() { + return None; + } + + if !self.is_initialised { + self.is_initialised = true; + + if !self + .cursor + .matches_node_selector(&self.matcher.node_selector) + { + return None; + } + + if let Some(child) = self.matcher.child.as_ref() { + let mut child_cursor = self.cursor.clone(); + if !child_cursor.go_to_first_child() { + return None; + } + + self.child = Some(child.create_combinator(child_cursor)); + } else { + let mut return_cursor = self.cursor.clone(); + return_cursor.irrevocably_go_to_next_sibling(); + return Some(return_cursor); + } + } + + if let Some(child) = self.child.as_mut() { + while let Some(cursor) = child.as_mut().next() { + if cursor.is_completed() { + let mut return_cursor = self.cursor.clone(); + return_cursor.irrevocably_go_to_next_sibling(); + return Some(return_cursor); + } + } + self.child = None; + } + + None + } + + fn accumulate_bindings(&self, bindings: &mut HashMap>) { + if let Some(child) = self.child.as_ref() { + child.accumulate_bindings(bindings); + } + } +} + +struct SequenceCombinator { + matcher: Rc, + children: Vec, + cursor: Cursor, + is_initialised: bool, +} + +impl SequenceCombinator { + fn new(matcher: Rc, cursor: Cursor) -> Self { + Self { + matcher, + children: vec![], + cursor, + is_initialised: false, + } + } +} + +impl Combinator for SequenceCombinator { + fn next(&mut self) -> Option { + if !self.is_initialised { + self.is_initialised = true; + + let child_cursor = self.cursor.clone(); + let child = self.matcher.children[0].create_combinator(child_cursor); + self.children.push(child); + } + + while !self.children.is_empty() { + if let Some(child_cursor) = self.children.last_mut().unwrap().next() { + if self.children.len() == self.matcher.children.len() { + return Some(child_cursor); + } + + let child = + self.matcher.children[self.children.len()].create_combinator(child_cursor); + self.children.push(child); + } else { + self.children.pop(); + } + } + + None + } + + fn accumulate_bindings(&self, bindings: &mut HashMap>) { + for child in &self.children { + child.accumulate_bindings(bindings); + } + } +} + +struct AlternativesCombinator { + matcher: Rc, + next_child_number: usize, + child: Option, + cursor: Cursor, +} + +impl AlternativesCombinator { + fn new(matcher: Rc, cursor: Cursor) -> Self { + Self { + matcher, + next_child_number: 0, + child: None, + cursor, + } + } +} + +impl Combinator for AlternativesCombinator { + fn next(&mut self) -> Option { + loop { + if self.child.is_none() { + match self.matcher.children.get(self.next_child_number) { + Some(child) => { + let child = child.create_combinator(self.cursor.clone()); + self.child = Some(child); + self.next_child_number += 1; + } + None => return None, + } + } + + match self.child.as_mut().unwrap().next() { + Some(cursor) => return Some(cursor), + None => self.child = None, + } + } + } + + fn accumulate_bindings(&self, bindings: &mut HashMap>) { + self.child.as_ref().unwrap().accumulate_bindings(bindings); + } +} + +struct OptionalCombinator { + matcher: Rc, + child: Option, + cursor: Cursor, + have_nonempty_match: bool, +} + +impl OptionalCombinator { + fn new(matcher: Rc, cursor: Cursor) -> Self { + Self { + matcher, + child: None, + cursor, + have_nonempty_match: false, + } + } +} + +impl Combinator for OptionalCombinator { + fn next(&mut self) -> Option { + if let Some(child) = self.child.as_mut() { + match child.next() { + result @ Some(_) => { + self.have_nonempty_match = true; + result + } + None => { + self.child = None; + None + } + } + } else { + let child_cursor = self.cursor.clone(); + let child = self.matcher.child.create_combinator(child_cursor); + self.child = Some(child); + Some(self.cursor.clone()) + } + } + + fn accumulate_bindings(&self, bindings: &mut HashMap>) { + if self.have_nonempty_match { + if let Some(child) = self.child.as_ref() { + child.accumulate_bindings(bindings); + } + } + } +} + +struct OneOrMoreCombinator { + matcher: Rc, + children: Vec, + cursor_for_next_repetition: Option, +} + +impl OneOrMoreCombinator { + fn new(matcher: Rc, cursor: Cursor) -> Self { + let cursor_for_next_repetition = Some(cursor); + Self { + matcher, + children: vec![], + cursor_for_next_repetition, + } + } +} + +impl Combinator for OneOrMoreCombinator { + fn next(&mut self) -> Option { + loop { + if let Some(cursor_for_next_repetition) = self.cursor_for_next_repetition.take() { + let next_child = self + .matcher + .child + .create_combinator(cursor_for_next_repetition); + self.children.push(next_child); + } else { + let tail = self.children.last_mut().unwrap(); + if let Some(cursor) = tail.next() { + if !cursor.is_completed() { + self.cursor_for_next_repetition = Some(cursor.clone()); + } + return Some(cursor); + } + self.children.pop(); + if self.children.is_empty() { + return None; + } + } + } + } + + fn accumulate_bindings(&self, bindings: &mut HashMap>) { + for child in &self.children { + child.accumulate_bindings(bindings); + } + } +} + +struct EllipsisCombinator { + cursor: Cursor, + has_returned_initial_empty_value: bool, +} + +impl EllipsisCombinator { + fn new(cursor: Cursor) -> Self { + Self { + cursor, + has_returned_initial_empty_value: false, + } + } +} + +impl Combinator for EllipsisCombinator { + fn next(&mut self) -> Option { + if !self.has_returned_initial_empty_value { + self.has_returned_initial_empty_value = true; + return Some(self.cursor.clone()); + } + + if self.cursor.irrevocably_go_to_next_sibling() { + return Some(self.cursor.clone()); + } + + None + } + + fn accumulate_bindings(&self, _bindings: &mut HashMap>) {} +} diff --git a/crates/codegen/parser/runtime/src/query/engine_tests.rs b/crates/codegen/parser/runtime/src/query/engine_tests.rs new file mode 100644 index 0000000000..114e9f3c2b --- /dev/null +++ b/crates/codegen/parser/runtime/src/query/engine_tests.rs @@ -0,0 +1,253 @@ +use std::collections::{BTreeMap, HashMap}; + +// This crate is copied to another crate, so all imports should be relative +use super::super::cst::{NamedNode, Node}; +use super::super::cursor::Cursor; +use super::super::kinds::{FieldName, RuleKind, TokenKind}; +use super::super::text_index::TextIndex; +use super::engine::QueryResult; +use super::model::Query; + +fn token(name: Option, kind: TokenKind, text: &str) -> NamedNode { + NamedNode { + name, + node: Node::token(kind, text.to_string()), + } +} + +fn rule( + name: Option, + kind: RuleKind, + children: [NamedNode; N], +) -> NamedNode { + NamedNode { + name, + node: Node::rule(kind, children.into_iter().collect()), + } +} + +fn binding_cursors_to_strings( + bindings: HashMap>, +) -> BTreeMap> { + bindings + .into_iter() + .map(|(key, values)| { + ( + key, + values + .iter() + .map(|v| v.node().unparse()) + .collect::>(), + ) + }) + .collect::>() +} + +#[allow(unused_macro_rules)] +macro_rules! cst_tree { + ( @inner [ $($child:expr)* ]) => { [ $($child),* ] }; + + ( @inner [ $($child:expr)* ] $field_name:ident : $token_kind:ident $text:literal $(, $($rest:tt)*)? ) => { + cst_tree!(@inner [ $($child)* token(Some(FieldName::$field_name), TokenKind::$token_kind, $text) ] $($($rest)*)?) + }; + + ( @inner [ $($child:expr)* ] $token_kind:ident $text:literal $(, $($rest:tt)*)? ) => { + cst_tree!(@inner [ $($child)* token(None, TokenKind::$token_kind, $text) ] $($($rest)*)?) + }; + + ( @inner [ $($child:expr)* ] $field_name:ident : $rule_kind:ident [ $($children:tt)* ] $(, $($rest:tt)*)? ) => { + cst_tree!(@inner [ $($child)* rule(Some(FieldName::$field_name), RuleKind::$rule_kind, cst_tree!(@inner [] $($children)*)) ] $($($rest)*)?) + }; + + ( @inner [ $($child:expr)* ] $rule_kind:ident [ $($children:tt)* ] $(, $($rest:tt)*)? ) => { + cst_tree!(@inner [ $($child)* rule(None, RuleKind::$rule_kind, cst_tree!(@inner [] $($children)*)) ] $($($rest)*)?) + }; + + // Start with a rule + ( $field_name:ident : $rule_kind:ident [ $($children:tt)* ] ) => { + rule(Some(FieldName::$field_name), RuleKind::$rule_kind, cst_tree!(@inner [] $($children)*)) + }; + + ( $rule_kind:ident [ $($children:tt)* ] ) => { + rule(None, RuleKind::$rule_kind, cst_tree!(@inner [] $($children)*)) + }; + +} + +macro_rules! query_results { + ( $( { $( $key:ident : [ $($value:literal),* ] ),* } )* ) => { + vec![ $( { + #[allow(unused_mut)] + let mut bindings = BTreeMap::new(); + $( bindings.insert( stringify!($key).to_string(), vec![ $( $value.to_string() ),* ]); )* + bindings + } ),* ] + }; + +} + +fn run_query_test(tree: &NamedNode, query: &str, results: Vec>>) { + let cursor = tree.cursor_with_offset(TextIndex::ZERO); + let query = vec![Query::parse(query).unwrap()]; + let mut results = results.into_iter(); + for QueryResult { bindings, .. } in cursor.query(query) { + let bindings = binding_cursors_to_strings(bindings); + if let Some(expected_bindings) = results.next() { + assert_eq!(bindings, expected_bindings); + } else { + panic!("Unexpected query result: {bindings:?}"); + } + } + if let Some(expected_bindings) = results.next() { + panic!("Missing query result: {expected_bindings:?}"); + } +} + +fn common_test_tree() -> NamedNode { + cst_tree!( + Rule1 [ + Name1: Token1 "t1", + Token1 "t2", + Token1 "t3", + Rule2 [ + Token1 "t5", + Name1: Token1 "t6", + ], + ] + ) +} + +#[test] +fn test_spread() { + run_query_test( + &common_test_tree(), + "[Rule1 ... @x1 [Token1] ... @x2 [Token1] ...]", + query_results! { + {x1: ["t1"], x2: ["t2"]} + {x1: ["t1"], x2: ["t3"]} + {x1: ["t2"], x2: ["t3"]} + }, + ); +} + +#[test] +fn test_adjacent() { + run_query_test( + &common_test_tree(), + "[Rule1 ... @y1 [Token1] @y2 [Token1] ...]", + query_results! { + {y1: ["t1"], y2: ["t2"]} + {y1: ["t2"], y2: ["t3"]} + }, + ); +} + +#[test] +fn test_child() { + run_query_test( + &common_test_tree(), + "[Rule2 ... @x [Token1] ...]", + query_results! { + {x: ["t5"]} + {x: ["t6"]} + }, + ); +} + +#[test] +fn test_parent_and_child() { + run_query_test( + &common_test_tree(), + "[Rule1 ... @p [Name1:_] ... [Rule2 ... @c [Token1] ...]]", + query_results! { + {c: ["t5"], p: ["t1"]} + {c: ["t6"], p: ["t1"]} + }, + ); +} + +#[test] +fn test_named() { + run_query_test( + &common_test_tree(), + "[Rule1 ... @x [Name1:Token1] ...]", + query_results! { + {x: ["t1"]} + }, + ); +} + +#[test] +fn test_multilevel_adjacent() { + run_query_test( + &common_test_tree(), + "[_ ... @x [Token1] @y [Token1] ...]", + query_results! { + {x: ["t1"], y: ["t2"]} + {x: ["t2"], y: ["t3"]} + {x: ["t5"], y: ["t6"]} + }, + ); +} + +#[test] +fn test_multilevel_named() { + run_query_test( + &common_test_tree(), + "[_ ... @x [Name1:_] ...]", + query_results! { + {x: ["t1"]} + {x: ["t6"]} + }, + ); +} + +#[test] +fn test_text_value() { + run_query_test( + &common_test_tree(), + r#"[Rule1 ... @z1 [Token1] ["t2"] @z2 [Token1] ...]"#, + query_results! { + {z1: ["t1"], z2: ["t3"]} + }, + ); +} + +#[test] +fn test_one_or_more() { + run_query_test( + &common_test_tree(), + "[Rule1 ... (@x [Token1])+ [_] ]", + query_results! { + {x: ["t1", "t2", "t3"]} + {x: ["t2", "t3"]} + {x: ["t3"]} + }, + ); +} + +#[test] +fn test_zero_or_more() { + run_query_test( + &common_test_tree(), + "[Rule1 ... (@y [Token1])* [_] ]", + query_results! { + {y: ["t1", "t2", "t3"]} + {y: ["t2", "t3"]} + {y: ["t3"]} + {} + }, + ); +} + +#[test] +fn test_optional() { + run_query_test( + &common_test_tree(), + "[Rule1 ... (@z [Token1])? [_] ]", + query_results! { + {z: ["t3"]} + {} + }, + ); +} diff --git a/crates/codegen/parser/runtime/src/query/mod.rs b/crates/codegen/parser/runtime/src/query/mod.rs new file mode 100644 index 0000000000..d0375c481f --- /dev/null +++ b/crates/codegen/parser/runtime/src/query/mod.rs @@ -0,0 +1,9 @@ +mod engine; +pub mod model; +mod parser; + +#[cfg(test)] +mod engine_tests; + +#[cfg(test)] +mod parser_tests; diff --git a/crates/codegen/parser/runtime/src/query/mod_for_destination.rs b/crates/codegen/parser/runtime/src/query/mod_for_destination.rs new file mode 100644 index 0000000000..7f5222dc6d --- /dev/null +++ b/crates/codegen/parser/runtime/src/query/mod_for_destination.rs @@ -0,0 +1,3 @@ +mod engine; +pub mod model; +mod parser; diff --git a/crates/codegen/parser/runtime/src/query/model.rs b/crates/codegen/parser/runtime/src/query/model.rs new file mode 100644 index 0000000000..e78ba1805e --- /dev/null +++ b/crates/codegen/parser/runtime/src/query/model.rs @@ -0,0 +1,172 @@ +use std::fmt; +use std::rc::Rc; + +// This crate is copied to another crate, so all imports should be relative +use super::super::kinds::{FieldName, RuleKind, TokenKind}; + +#[derive(Clone)] +pub struct Query(pub(super) Matcher); + +impl Query { + pub fn parse(text: &str) -> Result { + Matcher::parse(text).map(Self) + } +} + +impl fmt::Display for Query { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +#[derive(Clone)] +pub(super) enum Matcher { + Binding(Rc), + Node(Rc), + Optional(Rc), + Alternatives(Rc), + Sequence(Rc), + OneOrMore(Rc), + Ellipsis, +} + +impl Matcher { + fn parse(text: &str) -> Result { + super::parser::parse_query(text) + } +} + +impl fmt::Display for Matcher { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Binding(binding) => { + write!(f, "@{} {}", binding.name, binding.child) + } + Self::Node(node) => { + if let Some(child) = &node.child { + write!(f, "[{} {}]", node.node_selector, child) + } else { + write!(f, "[{}]", node.node_selector) + } + } + Self::Optional(optional) => { + write!(f, "({})?", optional.child) + } + Self::Alternatives(alternatives) => { + let mut done_first = false; + write!(f, "(")?; + for a in &alternatives.children { + if done_first { + write!(f, " | ")?; + } else { + done_first = true; + }; + a.fmt(f)?; + } + write!(f, ")")?; + Ok(()) + } + Self::Sequence(sequence) => { + let mut done_first = false; + for a in &sequence.children { + if done_first { + write!(f, " ")?; + } else { + done_first = true; + }; + a.fmt(f)?; + } + Ok(()) + } + Self::OneOrMore(one_or_more) => { + write!(f, "({})+", one_or_more.child) + } + Self::Ellipsis => write!(f, "..."), + } + } +} + +#[derive(Copy, Clone, PartialEq, Eq)] +pub(super) enum Kind { + Rule(RuleKind), + Token(TokenKind), +} + +impl fmt::Display for Kind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Kind::Rule(rule) => write!(f, "{rule}"), + Kind::Token(token) => write!(f, "{token}"), + } + } +} + +#[derive(Clone)] +pub(super) enum NodeSelector { + Anonymous, + Kind { kind: Kind }, + Text { text: String }, + FieldName { field_name: FieldName }, + FieldNameAndKind { field_name: FieldName, kind: Kind }, + FieldNameAndText { field_name: FieldName, text: String }, +} + +impl fmt::Display for NodeSelector { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fn escape_string(string: &str) -> String { + string + .chars() + .map(|c| match c { + '"' => "\\\"".to_string(), + '\\' => "\\\\".to_string(), + '\n' => "\\n".to_string(), + '\r' => "\\r".to_string(), + '\t' => "\\t".to_string(), + '\u{08}' => "\\b".to_string(), + '\u{0c}' => "\\f".to_string(), + _ if c.is_ascii_graphic() => c.to_string(), + _ => format!("\\u{{{:x}}}", c as u32), + }) + .collect::() + } + + match self { + Self::Anonymous => write!(f, "_"), + Self::Kind { kind } => kind.fmt(f), + Self::Text { text } => write!(f, "\"{}\"", escape_string(text)), + Self::FieldName { field_name } => field_name.fmt(f), + Self::FieldNameAndKind { field_name, kind } => { + write!(f, "{field_name}; {kind}") + } + Self::FieldNameAndText { field_name, text } => { + write!(f, "{field_name}: \"{}\"", escape_string(text)) + } + } + } +} + +pub(super) struct BindingMatcher { + pub name: String, + pub child: Matcher, +} + +pub(super) struct NodeMatcher { + pub node_selector: NodeSelector, + pub child: Option, +} + +pub(super) struct SequenceMatcher { + pub children: Vec, +} + +pub(super) struct AlternativesMatcher { + pub children: Vec, +} + +pub(super) struct OptionalMatcher { + pub child: Matcher, +} + +pub(super) struct OneOrMoreMatcher { + pub child: Matcher, +} diff --git a/crates/codegen/parser/runtime/src/query/parser.rs b/crates/codegen/parser/runtime/src/query/parser.rs new file mode 100644 index 0000000000..66823d20b4 --- /dev/null +++ b/crates/codegen/parser/runtime/src/query/parser.rs @@ -0,0 +1,269 @@ +use std::rc::Rc; + +use nom::branch::alt; +use nom::bytes::complete::{is_not, tag, take_while, take_while1, take_while_m_n}; +use nom::character::complete::{char, multispace0, multispace1, satisfy}; +use nom::combinator::{all_consuming, map_opt, map_res, opt, recognize, value, verify}; +use nom::error::VerboseError; +use nom::multi::{fold_many0, many0, many1}; +use nom::sequence::{delimited, pair, preceded, terminated}; +use nom::{Finish, IResult, Parser}; + +// This crate is copied to another crate, so all imports should be relative +use super::super::kinds::{FieldName, RuleKind, TokenKind}; +use super::model::{ + AlternativesMatcher, BindingMatcher, Kind, Matcher, NodeMatcher, NodeSelector, + OneOrMoreMatcher, OptionalMatcher, SequenceMatcher, +}; + +pub(super) fn parse_query(input: &str) -> Result { + all_consuming(preceded( + multispace0, + opt(binding_name_token) + .and(alt(( + parse_node, + delimited( + token('('), + pair(parse_node, many1(preceded(token('|'), parse_node))), + token(')'), + ) + .map(|(first, rest)| { + let mut children = vec![first]; + children.extend(rest); + Matcher::Alternatives(Rc::new(AlternativesMatcher { children })) + }), + ))) + .map(|(binding_name, child)| { + if let Some(name) = binding_name { + Matcher::Binding(Rc::new(BindingMatcher { name, child })) + } else { + child + } + }), + )) + .parse(input) + .finish() + .map(|(_, query)| query) + .map_err(|e| e.to_string()) +} + +fn parse_node(i: &str) -> IResult<&str, Matcher, VerboseError<&str>> { + delimited( + token('['), + parse_node_selector.and(many0(parse_match)), + token(']'), + ) + .map(|(id, mut children)| { + let child = if children.is_empty() { + None + } else if children.len() == 1 { + Some(children.pop().unwrap()) + } else { + Some(Matcher::Sequence(Rc::new(SequenceMatcher { children }))) + }; + Matcher::Node(Rc::new(NodeMatcher { + node_selector: id, + child, + })) + }) + .parse(i) +} + +fn parse_node_selector(input: &str) -> IResult<&str, NodeSelector, VerboseError<&str>> { + enum Tail { + Anonymous, + Kind(Kind), + Text(String), + } + + opt(field_name_token) + .and(alt(( + token('_').map(|_| Tail::Anonymous), + kind_token.map(Tail::Kind), + text_token.map(Tail::Text), + ))) + .map(|(field_name, tail)| match (field_name, tail) { + (None, Tail::Anonymous) => NodeSelector::Anonymous, + (None, Tail::Kind(kind)) => NodeSelector::Kind { kind }, + (None, Tail::Text(string)) => NodeSelector::Text { text: string }, + (Some(field), Tail::Anonymous) => NodeSelector::FieldName { field_name: field }, + (Some(field), Tail::Kind(kind)) => NodeSelector::FieldNameAndKind { + field_name: field, + kind, + }, + (Some(field), Tail::Text(string)) => NodeSelector::FieldNameAndText { + field_name: field, + text: string, + }, + }) + .parse(input) +} + +#[derive(Clone)] +enum Quantifier { + ZeroOrOne, + ZeroOrMore, + OneOrMore, +} + +fn parse_match(input: &str) -> IResult<&str, Matcher, VerboseError<&str>> { + opt(binding_name_token) + .and(alt(( + parse_node, + pair( + delimited(token('('), many1(parse_match), token(')')), + parse_trailing_quantifier, + ) + .map(|(mut children, quantifier)| { + let child = if children.len() == 1 { + children.pop().unwrap() + } else { + Matcher::Sequence(Rc::new(SequenceMatcher { children })) + }; + match quantifier { + Quantifier::ZeroOrOne => Matcher::Optional(Rc::new(OptionalMatcher { child })), + Quantifier::ZeroOrMore => Matcher::Optional(Rc::new(OptionalMatcher { + child: Matcher::OneOrMore(Rc::new(OneOrMoreMatcher { child })), + })), + Quantifier::OneOrMore => { + Matcher::OneOrMore(Rc::new(OneOrMoreMatcher { child })) + } + } + }), + delimited( + token('('), + pair(parse_match, many1(preceded(token('|'), parse_match))), + token(')'), + ) + .map(|(first, rest)| { + let mut children = vec![first]; + children.extend(rest); + Matcher::Alternatives(Rc::new(AlternativesMatcher { children })) + }), + ellipsis_token.map(|_| Matcher::Ellipsis), + ))) + .map(|(binding, child)| { + if let Some(name) = binding { + Matcher::Binding(Rc::new(BindingMatcher { name, child })) + } else { + child + } + }) + .parse(input) +} + +fn parse_trailing_quantifier(i: &str) -> IResult<&str, Quantifier, VerboseError<&str>> { + alt(( + value(Quantifier::ZeroOrOne, token('?')), + value(Quantifier::ZeroOrMore, token('*')), + value(Quantifier::OneOrMore, token('+')), + )) + .parse(i) +} + +fn raw_identifier(i: &str) -> IResult<&str, String, VerboseError<&str>> { + let identifier_head = satisfy(|c| c.is_alphabetic()); + let is_identifier_tail = |c: char| c == '_' || c.is_alphanumeric(); + recognize(alt(( + // single underscore is the anonymous syntax item, + // so we don't allow it as an identifier + char('_').and(take_while1(is_identifier_tail)), + identifier_head.and(take_while(is_identifier_tail)), + ))) + .map(|s: &str| s.to_string()) + .parse(i) +} + +fn binding_name_token(i: &str) -> IResult<&str, String, VerboseError<&str>> { + terminated(preceded(char('@'), raw_identifier), multispace0).parse(i) +} + +fn kind_token(i: &str) -> IResult<&str, Kind, VerboseError<&str>> { + terminated(raw_identifier, multispace0) + .map(|id| { + TokenKind::try_from(id.as_str()) + .map(Kind::Token) + .or_else(|_| RuleKind::try_from(id.as_str()).map(Kind::Rule)) + .unwrap() // TODO + }) + .parse(i) +} + +fn field_name_token(i: &str) -> IResult<&str, FieldName, VerboseError<&str>> { + terminated(raw_identifier, token(':')) + .map(|id| FieldName::try_from(id.as_str()).unwrap()) + .parse(i) +} + +fn text_token(i: &str) -> IResult<&str, String, VerboseError<&str>> { + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + enum Fragment<'a> { + EscapedChar(char), + SwallowedWhitespace, + UnescapedSequence(&'a str), + } + + let escaped_char = preceded( + char('\\'), + alt(( + map_opt( + map_res( + preceded( + char('u'), + delimited( + char('{'), + // 1 to 6 hex digits + take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit()), + char('}'), + ), + ), + // converted from hex + move |hex| u32::from_str_radix(hex, 16), + ), + // converted to a char + std::char::from_u32, + ), + value('\n', char('n')), + value('\r', char('r')), + value('\t', char('t')), + value('\u{08}', char('b')), + value('\u{0C}', char('f')), + value('\\', char('\\')), + value('"', char('"')), + )), + ) + .map(Fragment::EscapedChar); + + // any amount of whitespace, collapsed to nothing + let swallowed_whitespace = value( + Fragment::SwallowedWhitespace, + preceded(char('\\'), multispace1), + ); + + let unescaped_sequence = + verify(is_not("\"\\"), |s: &str| !s.is_empty()).map(Fragment::UnescapedSequence); + + let fragment = alt((unescaped_sequence, escaped_char, swallowed_whitespace)); + + delimited( + char('"'), + fold_many0(fragment, String::new, |mut string, fragment| { + match fragment { + Fragment::EscapedChar(c) => string.push(c), + Fragment::SwallowedWhitespace => {} + Fragment::UnescapedSequence(s) => string.push_str(s), + } + string + }), + char('"'), + ) + .parse(i) +} + +fn ellipsis_token(i: &str) -> IResult<&str, &str, VerboseError<&str>> { + terminated(tag("..."), multispace0).parse(i) +} + +fn token<'input>(c: char) -> impl Parser<&'input str, char, VerboseError<&'input str>> { + terminated(char(c), multispace0) +} diff --git a/crates/codegen/parser/runtime/src/query/parser_tests.rs b/crates/codegen/parser/runtime/src/query/parser_tests.rs new file mode 100644 index 0000000000..072c74ddc4 --- /dev/null +++ b/crates/codegen/parser/runtime/src/query/parser_tests.rs @@ -0,0 +1,52 @@ +use super::model::Query; + +fn run_parser_test(input: &str, result: &str) { + assert_eq!(Query::parse(input).unwrap().to_string(), result); +} + +#[test] +fn test_text_escaping() { + run_parser_test( + r#"["abc\\\"\n\r\b\t\u{01abcd}\ + "]"#, + r#"["abc\\\"\n\r\b\t\u{1abcd}"]"#, + ); +} + +#[test] +fn test_ellipsis() { + run_parser_test(r#"[_ ...]"#, r#"[_ ...]"#); +} + +#[test] +fn test_anonymous() { + run_parser_test(r#"[_]"#, r#"[_]"#); +} + +#[test] +fn test_root_binding() { + run_parser_test(r#"@root [Token1]"#, r#"@root [Token1]"#); +} + +#[test] +fn test_binding() { + run_parser_test(r#"[Rule1 @b [Token1]]"#, r#"[Rule1 @b [Token1]]"#); +} + +#[test] +fn test_zero_or_more_canonicalisation() { + run_parser_test(r#"[Rule1 ([Rule2])*]"#, r#"[Rule1 (([Rule2])+)?]"#); +} + +// Test the error message on parse failure +#[test] +fn test_parsing_error() { + let result = Query::parse(r#"@root [_ ..."#); + match result { + Ok(_) => panic!("Expected error"), + Err(e) => assert_eq!( + e.to_string(), + "Parse error:\nexpected '(' at: [_ ...\nAlt at: [_ ...\n" + ), + } +} diff --git a/crates/codegen/parser/runtime/src/templates/language.rs.jinja2 b/crates/codegen/parser/runtime/src/templates/language.rs.jinja2 index a6df6f5367..bb9c136133 100644 --- a/crates/codegen/parser/runtime/src/templates/language.rs.jinja2 +++ b/crates/codegen/parser/runtime/src/templates/language.rs.jinja2 @@ -17,9 +17,9 @@ use crate::kinds::{ }; use crate::lexer::{KeywordScan, Lexer, ScannedToken}; #[cfg(feature = "slang_napi_interfaces")] -use crate::napi::napi_parse_output::ParseOutput as NAPIParseOutput; +use crate::napi_interface::parse_output::ParseOutput as NAPIParseOutput; use crate::parse_output::ParseOutput; -use crate::support::{ +use crate::parser_support::{ ChoiceHelper, OneOrMoreHelper, OptionalHelper, ParserContext, ParserFunction, ParserResult, PrecedenceHelper, RecoverFromNoMatch, SeparatedHelper, SequenceHelper, ZeroOrMoreHelper, }; diff --git a/crates/codegen/parser/runtime/src/visitor.rs b/crates/codegen/parser/runtime/src/visitor.rs deleted file mode 100644 index 8b13789179..0000000000 --- a/crates/codegen/parser/runtime/src/visitor.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/crates/solidity/outputs/cargo/crate/Cargo.toml b/crates/solidity/outputs/cargo/crate/Cargo.toml index 6dab111b05..60e677fc86 100644 --- a/crates/solidity/outputs/cargo/crate/Cargo.toml +++ b/crates/solidity/outputs/cargo/crate/Cargo.toml @@ -45,6 +45,7 @@ solidity_language = { workspace = true } # __REMOVE_THIS_LINE_DURING_CARG anyhow = { workspace = true, optional = true } ariadne = { workspace = true } clap = { workspace = true, optional = true } +nom = { workspace = true } semver = { workspace = true } serde = { workspace = true } serde_json = { workspace = true, optional = true } diff --git a/crates/solidity/outputs/cargo/crate/src/generated/language.rs b/crates/solidity/outputs/cargo/crate/src/generated/language.rs index 1fce227052..66d568b83f 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/language.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/language.rs @@ -19,9 +19,9 @@ use crate::kinds::{ }; use crate::lexer::{KeywordScan, Lexer, ScannedToken}; #[cfg(feature = "slang_napi_interfaces")] -use crate::napi::napi_parse_output::ParseOutput as NAPIParseOutput; +use crate::napi_interface::parse_output::ParseOutput as NAPIParseOutput; use crate::parse_output::ParseOutput; -use crate::support::{ +use crate::parser_support::{ ChoiceHelper, OneOrMoreHelper, OptionalHelper, ParserContext, ParserFunction, ParserResult, PrecedenceHelper, RecoverFromNoMatch, SeparatedHelper, SequenceHelper, ZeroOrMoreHelper, }; diff --git a/crates/solidity/outputs/cargo/crate/src/generated/lexer.rs b/crates/solidity/outputs/cargo/crate/src/generated/lexer.rs index 4cece5532e..61b71dd0c1 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/lexer.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/lexer.rs @@ -2,7 +2,7 @@ use crate::cst::{self, NamedNode}; use crate::kinds::{IsLexicalContext, TokenKind}; -use crate::support::{ParserContext, ParserResult}; +use crate::parser_support::{ParserContext, ParserResult}; /// Whether a keyword has been scanned and if so, whether it is reserved (unusable as an identifier) /// or not. diff --git a/crates/solidity/outputs/cargo/crate/src/generated/mod.rs b/crates/solidity/outputs/cargo/crate/src/generated/mod.rs index aa488c9b16..c782bddb63 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/mod.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/mod.rs @@ -1,7 +1,7 @@ // This file is generated automatically by infrastructure scripts. Please don't edit by hand. #[macro_use] -pub mod support; +pub mod parser_support; pub mod cst; pub mod cursor; @@ -10,7 +10,8 @@ pub mod language; pub(crate) mod lexer; pub mod parse_error; pub mod parse_output; +pub mod query; pub mod text_index; #[cfg(feature = "slang_napi_interfaces")] -pub mod napi; +pub mod napi_interface; diff --git a/crates/solidity/outputs/cargo/crate/src/generated/napi/napi_ast_selectors.rs b/crates/solidity/outputs/cargo/crate/src/generated/napi_interface/ast_selectors.rs similarity index 99% rename from crates/solidity/outputs/cargo/crate/src/generated/napi/napi_ast_selectors.rs rename to crates/solidity/outputs/cargo/crate/src/generated/napi_interface/ast_selectors.rs index 93fe762cc7..793425de75 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/napi/napi_ast_selectors.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/napi_interface/ast_selectors.rs @@ -7,8 +7,8 @@ use std::rc::Rc; use napi::{Env, JsObject}; use napi_derive::napi; -use crate::napi::napi_cst::{RuleNode, ToJS}; -use crate::napi::{RuleKind, RustNamedNode, RustNode, RustRuleNode, TokenKind}; +use crate::napi_interface::cst::{RuleNode, ToJS}; +use crate::napi_interface::{RuleKind, RustNamedNode, RustNode, RustRuleNode, TokenKind}; // // Sequences: diff --git a/crates/solidity/outputs/cargo/crate/src/generated/napi/napi_cst.rs b/crates/solidity/outputs/cargo/crate/src/generated/napi_interface/cst.rs similarity index 94% rename from crates/solidity/outputs/cargo/crate/src/generated/napi/napi_cst.rs rename to crates/solidity/outputs/cargo/crate/src/generated/napi_interface/cst.rs index bf890e4bca..e8dcae0dfc 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/napi/napi_cst.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/napi_interface/cst.rs @@ -6,9 +6,11 @@ use napi::bindgen_prelude::{Env, ToNapiValue}; use napi::{JsObject, NapiValue}; use napi_derive::napi; -use crate::napi::napi_cursor::Cursor; -use crate::napi::napi_text_index::TextIndex; -use crate::napi::{RuleKind, RustNode, RustRuleNode, RustTextIndex, RustTokenNode, TokenKind}; +use crate::napi_interface::cursor::Cursor; +use crate::napi_interface::text_index::TextIndex; +use crate::napi_interface::{ + RuleKind, RustNode, RustRuleNode, RustTextIndex, RustTokenNode, TokenKind, +}; #[napi(namespace = "cst")] pub enum NodeType { diff --git a/crates/solidity/outputs/cargo/crate/src/generated/napi/napi_cursor.rs b/crates/solidity/outputs/cargo/crate/src/generated/napi_interface/cursor.rs similarity index 97% rename from crates/solidity/outputs/cargo/crate/src/generated/napi/napi_cursor.rs rename to crates/solidity/outputs/cargo/crate/src/generated/napi_interface/cursor.rs index e2e91d1d6f..00d10f1c64 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/napi/napi_cursor.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/napi_interface/cursor.rs @@ -5,13 +5,13 @@ // The functions are meant to be definitions for export, so they're not really used #![allow(clippy::return_self_not_must_use)] +use cst::ToJS; use napi::bindgen_prelude::Env; use napi::JsObject; -use napi_cst::ToJS; use napi_derive::napi; -use napi_text_index::{TextIndex, TextRange}; +use text_index::{TextIndex, TextRange}; -use crate::napi::{napi_cst, napi_text_index, FieldName, RuleKind, RustCursor, TokenKind}; +use crate::napi_interface::{cst, text_index, FieldName, RuleKind, RustCursor, TokenKind}; #[napi(namespace = "cursor")] pub struct Cursor(Box); diff --git a/crates/solidity/outputs/cargo/crate/src/generated/napi/mod.rs b/crates/solidity/outputs/cargo/crate/src/generated/napi_interface/mod.rs similarity index 81% rename from crates/solidity/outputs/cargo/crate/src/generated/napi/mod.rs rename to crates/solidity/outputs/cargo/crate/src/generated/napi_interface/mod.rs index 3faae26f23..d065d5deec 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/napi/mod.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/napi_interface/mod.rs @@ -1,11 +1,11 @@ // This file is generated automatically by infrastructure scripts. Please don't edit by hand. -pub mod napi_ast_selectors; -pub mod napi_cst; -pub mod napi_cursor; -pub mod napi_parse_error; -pub mod napi_parse_output; -pub mod napi_text_index; +pub mod ast_selectors; +pub mod cst; +pub mod cursor; +pub mod parse_error; +pub mod parse_output; +pub mod text_index; type RustCursor = crate::cursor::Cursor; type RustNamedNode = crate::cst::NamedNode; diff --git a/crates/solidity/outputs/cargo/crate/src/generated/napi/napi_parse_error.rs b/crates/solidity/outputs/cargo/crate/src/generated/napi_interface/parse_error.rs similarity index 90% rename from crates/solidity/outputs/cargo/crate/src/generated/napi/napi_parse_error.rs rename to crates/solidity/outputs/cargo/crate/src/generated/napi_interface/parse_error.rs index bf6f7e7e34..ed79439b9e 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/napi/napi_parse_error.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/napi_interface/parse_error.rs @@ -4,9 +4,9 @@ #![allow(clippy::needless_pass_by_value)] use napi_derive::napi; -use napi_text_index::TextRange; +use text_index::TextRange; -use crate::napi::{napi_text_index, RustParseError}; +use crate::napi_interface::{text_index, RustParseError}; #[napi(namespace = "parse_error")] #[derive(PartialEq, Clone)] diff --git a/crates/solidity/outputs/cargo/crate/src/generated/napi/napi_parse_output.rs b/crates/solidity/outputs/cargo/crate/src/generated/napi_interface/parse_output.rs similarity index 81% rename from crates/solidity/outputs/cargo/crate/src/generated/napi/napi_parse_output.rs rename to crates/solidity/outputs/cargo/crate/src/generated/napi_interface/parse_output.rs index 95c3c5fadb..a2c4c13600 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/napi/napi_parse_output.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/napi_interface/parse_output.rs @@ -1,10 +1,10 @@ // This file is generated automatically by infrastructure scripts. Please don't edit by hand. +use cst::ToJS; use napi::bindgen_prelude::Env; -use napi_cst::ToJS; use napi_derive::napi; -use crate::napi::{napi_cst, napi_cursor, napi_parse_error, RustParseOutput}; +use crate::napi_interface::{cst, cursor, parse_error, RustParseOutput}; #[napi(namespace = "parse_output")] pub struct ParseOutput(RustParseOutput); @@ -23,7 +23,7 @@ impl ParseOutput { } #[napi(ts_return_type = "Array", catch_unwind)] - pub fn errors(&self) -> Vec { + pub fn errors(&self) -> Vec { self.0.errors().iter().map(|x| x.clone().into()).collect() } @@ -34,7 +34,7 @@ impl ParseOutput { /// Creates a cursor that starts at the root of the parse tree. #[napi(ts_return_type = "cursor.Cursor", catch_unwind)] - pub fn create_tree_cursor(&self) -> napi_cursor::Cursor { + pub fn create_tree_cursor(&self) -> cursor::Cursor { self.0.create_tree_cursor().into() } } diff --git a/crates/solidity/outputs/cargo/crate/src/generated/napi/napi_text_index.rs b/crates/solidity/outputs/cargo/crate/src/generated/napi_interface/text_index.rs similarity index 95% rename from crates/solidity/outputs/cargo/crate/src/generated/napi/napi_text_index.rs rename to crates/solidity/outputs/cargo/crate/src/generated/napi_interface/text_index.rs index b88350c8ed..9cb6667c1a 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/napi/napi_text_index.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/napi_interface/text_index.rs @@ -2,7 +2,7 @@ use napi_derive::napi; -use crate::napi::{RustTextIndex, RustTextRange}; +use crate::napi_interface::{RustTextIndex, RustTextRange}; #[napi(object, namespace = "text_index")] #[derive(Copy, Clone)] diff --git a/crates/solidity/outputs/cargo/crate/src/generated/support/choice_helper.rs b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/choice_helper.rs similarity index 98% rename from crates/solidity/outputs/cargo/crate/src/generated/support/choice_helper.rs rename to crates/solidity/outputs/cargo/crate/src/generated/parser_support/choice_helper.rs index 98e4de2dc3..e24443c826 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/support/choice_helper.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/choice_helper.rs @@ -6,8 +6,8 @@ use std::ops::ControlFlow; use crate::cst; use crate::kinds::TokenKind; use crate::parse_error::ParseError; -use crate::support::context::{Marker, ParserContext}; -use crate::support::ParserResult; +use crate::parser_support::context::{Marker, ParserContext}; +use crate::parser_support::ParserResult; use crate::text_index::TextIndex; /// Starting from a given position in the input, this helper will try to pick (and remember) a best match. Settles on diff --git a/crates/solidity/outputs/cargo/crate/src/generated/support/context.rs b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/context.rs similarity index 100% rename from crates/solidity/outputs/cargo/crate/src/generated/support/context.rs rename to crates/solidity/outputs/cargo/crate/src/generated/parser_support/context.rs diff --git a/crates/solidity/outputs/cargo/crate/src/generated/support/mod.rs b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/mod.rs similarity index 100% rename from crates/solidity/outputs/cargo/crate/src/generated/support/mod.rs rename to crates/solidity/outputs/cargo/crate/src/generated/parser_support/mod.rs diff --git a/crates/solidity/outputs/cargo/crate/src/generated/support/optional_helper.rs b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/optional_helper.rs similarity index 93% rename from crates/solidity/outputs/cargo/crate/src/generated/support/optional_helper.rs rename to crates/solidity/outputs/cargo/crate/src/generated/parser_support/optional_helper.rs index 66649e0ac5..dd933143b6 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/support/optional_helper.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/optional_helper.rs @@ -1,6 +1,6 @@ // This file is generated automatically by infrastructure scripts. Please don't edit by hand. -use crate::support::parser_result::ParserResult; +use crate::parser_support::parser_result::ParserResult; pub struct OptionalHelper; diff --git a/crates/solidity/outputs/cargo/crate/src/generated/support/parser_function.rs b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/parser_function.rs similarity index 97% rename from crates/solidity/outputs/cargo/crate/src/generated/support/parser_function.rs rename to crates/solidity/outputs/cargo/crate/src/generated/parser_support/parser_function.rs index e4a4493f32..41cc144145 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/support/parser_function.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/parser_function.rs @@ -7,8 +7,8 @@ use crate::kinds::TokenKind; use crate::lexer::Lexer; use crate::parse_error::ParseError; use crate::parse_output::ParseOutput; -use crate::support::context::ParserContext; -use crate::support::parser_result::{IncompleteMatch, Match, ParserResult, SkippedUntil}; +use crate::parser_support::context::ParserContext; +use crate::parser_support::parser_result::{IncompleteMatch, Match, ParserResult, SkippedUntil}; use crate::text_index::TextIndex; pub trait ParserFunction diff --git a/crates/solidity/outputs/cargo/crate/src/generated/support/parser_result.rs b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/parser_result.rs similarity index 100% rename from crates/solidity/outputs/cargo/crate/src/generated/support/parser_result.rs rename to crates/solidity/outputs/cargo/crate/src/generated/parser_support/parser_result.rs diff --git a/crates/solidity/outputs/cargo/crate/src/generated/support/precedence_helper.rs b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/precedence_helper.rs similarity index 97% rename from crates/solidity/outputs/cargo/crate/src/generated/support/precedence_helper.rs rename to crates/solidity/outputs/cargo/crate/src/generated/parser_support/precedence_helper.rs index 2cb544f4c3..9a7ff657e8 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/support/precedence_helper.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/precedence_helper.rs @@ -2,8 +2,10 @@ use crate::cst::{self, NamedNode}; use crate::kinds::{FieldName, RuleKind}; -use crate::support::parser_result::PrattElement::{self, Binary, Expression, Postfix, Prefix}; -use crate::support::parser_result::{ParserResult, PrattOperatorMatch}; +use crate::parser_support::parser_result::PrattElement::{ + self, Binary, Expression, Postfix, Prefix, +}; +use crate::parser_support::parser_result::{ParserResult, PrattOperatorMatch}; pub struct PrecedenceHelper; diff --git a/crates/solidity/outputs/cargo/crate/src/generated/support/recovery.rs b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/recovery.rs similarity index 97% rename from crates/solidity/outputs/cargo/crate/src/generated/support/recovery.rs rename to crates/solidity/outputs/cargo/crate/src/generated/parser_support/recovery.rs index d5976fff49..2584e20915 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/support/recovery.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/recovery.rs @@ -4,9 +4,9 @@ use crate::cst; use crate::kinds::{IsLexicalContext, TokenKind}; use crate::lexer::{Lexer, ScannedToken}; use crate::parse_error::ParseError; -use crate::support::context::ParserContext; -use crate::support::parser_result::SkippedUntil; -use crate::support::ParserResult; +use crate::parser_support::context::ParserContext; +use crate::parser_support::parser_result::SkippedUntil; +use crate::parser_support::ParserResult; use crate::text_index::{TextRange, TextRangeExtensions as _}; /// An explicit parameter for the [`ParserResult::recover_until_with_nested_delims`] method. diff --git a/crates/solidity/outputs/cargo/crate/src/generated/support/repetition_helper.rs b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/repetition_helper.rs similarity index 96% rename from crates/solidity/outputs/cargo/crate/src/generated/support/repetition_helper.rs rename to crates/solidity/outputs/cargo/crate/src/generated/parser_support/repetition_helper.rs index 07a7a666a9..8bde0a1fa9 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/support/repetition_helper.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/repetition_helper.rs @@ -1,7 +1,7 @@ // This file is generated automatically by infrastructure scripts. Please don't edit by hand. -use crate::support::context::ParserContext; -use crate::support::parser_result::{IncompleteMatch, NoMatch, ParserResult}; +use crate::parser_support::context::ParserContext; +use crate::parser_support::parser_result::{IncompleteMatch, NoMatch, ParserResult}; pub struct RepetitionHelper; diff --git a/crates/solidity/outputs/cargo/crate/src/generated/support/scanner_macros.rs b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/scanner_macros.rs similarity index 100% rename from crates/solidity/outputs/cargo/crate/src/generated/support/scanner_macros.rs rename to crates/solidity/outputs/cargo/crate/src/generated/parser_support/scanner_macros.rs diff --git a/crates/solidity/outputs/cargo/crate/src/generated/support/separated_helper.rs b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/separated_helper.rs similarity index 96% rename from crates/solidity/outputs/cargo/crate/src/generated/support/separated_helper.rs rename to crates/solidity/outputs/cargo/crate/src/generated/parser_support/separated_helper.rs index 43b00a078c..b1fdee1954 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/support/separated_helper.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/separated_helper.rs @@ -4,9 +4,9 @@ use crate::cst::{self, NamedNode}; use crate::kinds::{FieldName, IsLexicalContext, TokenKind}; use crate::lexer::Lexer; use crate::parse_error::ParseError; -use crate::support::parser_result::{ParserResult, SkippedUntil}; -use crate::support::recovery::skip_until_with_nested_delims; -use crate::support::ParserContext; +use crate::parser_support::parser_result::{ParserResult, SkippedUntil}; +use crate::parser_support::recovery::skip_until_with_nested_delims; +use crate::parser_support::ParserContext; use crate::text_index::TextRangeExtensions; pub struct SeparatedHelper; diff --git a/crates/solidity/outputs/cargo/crate/src/generated/support/sequence_helper.rs b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/sequence_helper.rs similarity index 99% rename from crates/solidity/outputs/cargo/crate/src/generated/support/sequence_helper.rs rename to crates/solidity/outputs/cargo/crate/src/generated/parser_support/sequence_helper.rs index ced41b9732..61d8a31a94 100644 --- a/crates/solidity/outputs/cargo/crate/src/generated/support/sequence_helper.rs +++ b/crates/solidity/outputs/cargo/crate/src/generated/parser_support/sequence_helper.rs @@ -4,7 +4,7 @@ use std::ops::ControlFlow; use crate::cst::{self, NamedNode}; use crate::kinds::{FieldName, TokenKind}; -use crate::support::parser_result::{Match, ParserResult, PrattElement, SkippedUntil}; +use crate::parser_support::parser_result::{Match, ParserResult, PrattElement, SkippedUntil}; /// Keeps accumulating parses sequentially until it hits an incomplete or no match. #[must_use] diff --git a/crates/solidity/outputs/cargo/crate/src/generated/query/engine.rs b/crates/solidity/outputs/cargo/crate/src/generated/query/engine.rs new file mode 100644 index 0000000000..82fa136c78 --- /dev/null +++ b/crates/solidity/outputs/cargo/crate/src/generated/query/engine.rs @@ -0,0 +1,480 @@ +// This file is generated automatically by infrastructure scripts. Please don't edit by hand. + +use std::collections::HashMap; +use std::rc::Rc; + +// This crate is copied to another crate, so all imports should be relative +use super::super::cst; +use super::super::cursor::Cursor; +use super::model::{ + AlternativesMatcher, BindingMatcher, Kind, Matcher, NodeMatcher, NodeSelector, + OneOrMoreMatcher, OptionalMatcher, Query, SequenceMatcher, +}; + +impl Cursor { + pub fn query(self, queries: Vec) -> QueryResultIterator { + QueryResultIterator::new(self, queries) + } + + fn irrevocably_go_to_next_sibling(&mut self) -> bool { + if self.is_completed() { + false + } else { + if !self.go_to_next_sibling() { + self.complete(); + } + true + } + } + + fn matches_node_selector(&self, node_selector: &NodeSelector) -> bool { + match self.node() { + cst::Node::Rule(rule) => match node_selector { + NodeSelector::Anonymous => true, + NodeSelector::Kind { kind } => Kind::Rule(rule.kind) == *kind, + NodeSelector::Text { .. } => false, + NodeSelector::FieldName { field_name } => Some(*field_name) == self.node_name(), + NodeSelector::FieldNameAndKind { field_name, kind } => { + Some(*field_name) == self.node_name() && Kind::Rule(rule.kind) == *kind + } + NodeSelector::FieldNameAndText { .. } => false, + }, + + cst::Node::Token(token) => match node_selector { + NodeSelector::Anonymous => true, + NodeSelector::Kind { kind } => Kind::Token(token.kind) == *kind, + NodeSelector::Text { text } => token.text == *text, + NodeSelector::FieldName { field_name } => Some(*field_name) == self.node_name(), + NodeSelector::FieldNameAndKind { field_name, kind } => { + Some(*field_name) == self.node_name() && Kind::Token(token.kind) == *kind + } + NodeSelector::FieldNameAndText { field_name, text } => { + Some(*field_name) == self.node_name() && token.text == *text + } + }, + } + } +} + +impl Matcher { + // This allows for queries to pre-flight against a cursor without allocating + fn can_match(&self, cursor: &Cursor) -> bool { + match self { + Self::Binding(matcher) => matcher.child.can_match(cursor), + Self::Node(matcher) => cursor.matches_node_selector(&matcher.node_selector), + Self::Alternatives(matcher) => matcher.children.iter().any(|c| c.can_match(cursor)), + Self::Sequence(matcher) => matcher.children[0].can_match(cursor), + Self::OneOrMore(matcher) => matcher.child.can_match(cursor), + Self::Optional(_) => true, + Self::Ellipsis => true, + } + } + + fn create_combinator(&self, cursor: Cursor) -> CombinatorRef { + match self { + Self::Binding(matcher) => Box::new(BindingCombinator::new(matcher.clone(), cursor)), + Self::Node(matcher) => Box::new(NodeCombinator::new(matcher.clone(), cursor)), + Self::Sequence(matcher) => Box::new(SequenceCombinator::new(matcher.clone(), cursor)), + Self::Alternatives(matcher) => { + Box::new(AlternativesCombinator::new(matcher.clone(), cursor)) + } + Self::Optional(matcher) => Box::new(OptionalCombinator::new(matcher.clone(), cursor)), + Self::OneOrMore(matcher) => Box::new(OneOrMoreCombinator::new(matcher.clone(), cursor)), + Self::Ellipsis => Box::new(EllipsisCombinator::new(cursor)), + } + } +} + +pub struct QueryResult { + pub query_number: usize, + pub bindings: HashMap>, +} + +pub struct QueryResultIterator { + cursor: Cursor, + queries: Vec, + query_number: usize, + combinator: Option, +} + +impl QueryResultIterator { + fn new(cursor: Cursor, queries: Vec) -> Self { + Self { + cursor, + queries, + query_number: 0, + combinator: None, + } + } + + fn advance_to_next_possible_matching_query(&mut self) { + while !self.cursor.is_completed() { + while self.query_number < self.queries.len() { + let matcher = &self.queries[self.query_number].0; + if matcher.can_match(&self.cursor) { + self.combinator = Some(matcher.create_combinator(self.cursor.clone())); + return; + }; + self.query_number += 1; + } + self.cursor.go_to_next(); + self.query_number = 0; + } + } +} + +impl Iterator for QueryResultIterator { + type Item = QueryResult; + + fn next(&mut self) -> Option { + while !self.cursor.is_completed() { + if let Some(combinator) = self.combinator.as_mut() { + if combinator.next().is_some() { + let mut bindings = HashMap::new(); + combinator.accumulate_bindings(&mut bindings); + return Some(QueryResult { + query_number: self.query_number, + bindings, + }); + } + self.query_number += 1; + } + + self.advance_to_next_possible_matching_query(); + } + + None + } +} + +trait Combinator { + // None -> failed to match, you must backtrack. DO NOT call again + // Some(cursor) if cursor.is_complete -> matched, end of input + // Some(cursor) if !cursor.is_complete -> matched, more input to go + fn next(&mut self) -> Option; + fn accumulate_bindings(&self, bindings: &mut HashMap>); +} +type CombinatorRef = Box; + +struct BindingCombinator { + matcher: Rc, + cursor: Cursor, + child: CombinatorRef, +} + +impl BindingCombinator { + fn new(matcher: Rc, cursor: Cursor) -> Self { + let child = matcher.child.create_combinator(cursor.clone()); + Self { + matcher, + cursor, + child, + } + } +} + +impl Combinator for BindingCombinator { + fn next(&mut self) -> Option { + self.child.next() + } + + fn accumulate_bindings(&self, bindings: &mut HashMap>) { + bindings + .entry(self.matcher.name.clone()) + .or_default() + .push(self.cursor.clone()); + } +} + +struct NodeCombinator { + matcher: Rc, + child: Option, + cursor: Cursor, + is_initialised: bool, +} + +impl NodeCombinator { + fn new(matcher: Rc, cursor: Cursor) -> Self { + Self { + matcher, + child: None, + cursor, + is_initialised: false, + } + } +} + +impl Combinator for NodeCombinator { + fn next(&mut self) -> Option { + if self.cursor.is_completed() { + return None; + } + + if !self.is_initialised { + self.is_initialised = true; + + if !self + .cursor + .matches_node_selector(&self.matcher.node_selector) + { + return None; + } + + if let Some(child) = self.matcher.child.as_ref() { + let mut child_cursor = self.cursor.clone(); + if !child_cursor.go_to_first_child() { + return None; + } + + self.child = Some(child.create_combinator(child_cursor)); + } else { + let mut return_cursor = self.cursor.clone(); + return_cursor.irrevocably_go_to_next_sibling(); + return Some(return_cursor); + } + } + + if let Some(child) = self.child.as_mut() { + while let Some(cursor) = child.as_mut().next() { + if cursor.is_completed() { + let mut return_cursor = self.cursor.clone(); + return_cursor.irrevocably_go_to_next_sibling(); + return Some(return_cursor); + } + } + self.child = None; + } + + None + } + + fn accumulate_bindings(&self, bindings: &mut HashMap>) { + if let Some(child) = self.child.as_ref() { + child.accumulate_bindings(bindings); + } + } +} + +struct SequenceCombinator { + matcher: Rc, + children: Vec, + cursor: Cursor, + is_initialised: bool, +} + +impl SequenceCombinator { + fn new(matcher: Rc, cursor: Cursor) -> Self { + Self { + matcher, + children: vec![], + cursor, + is_initialised: false, + } + } +} + +impl Combinator for SequenceCombinator { + fn next(&mut self) -> Option { + if !self.is_initialised { + self.is_initialised = true; + + let child_cursor = self.cursor.clone(); + let child = self.matcher.children[0].create_combinator(child_cursor); + self.children.push(child); + } + + while !self.children.is_empty() { + if let Some(child_cursor) = self.children.last_mut().unwrap().next() { + if self.children.len() == self.matcher.children.len() { + return Some(child_cursor); + } + + let child = + self.matcher.children[self.children.len()].create_combinator(child_cursor); + self.children.push(child); + } else { + self.children.pop(); + } + } + + None + } + + fn accumulate_bindings(&self, bindings: &mut HashMap>) { + for child in &self.children { + child.accumulate_bindings(bindings); + } + } +} + +struct AlternativesCombinator { + matcher: Rc, + next_child_number: usize, + child: Option, + cursor: Cursor, +} + +impl AlternativesCombinator { + fn new(matcher: Rc, cursor: Cursor) -> Self { + Self { + matcher, + next_child_number: 0, + child: None, + cursor, + } + } +} + +impl Combinator for AlternativesCombinator { + fn next(&mut self) -> Option { + loop { + if self.child.is_none() { + match self.matcher.children.get(self.next_child_number) { + Some(child) => { + let child = child.create_combinator(self.cursor.clone()); + self.child = Some(child); + self.next_child_number += 1; + } + None => return None, + } + } + + match self.child.as_mut().unwrap().next() { + Some(cursor) => return Some(cursor), + None => self.child = None, + } + } + } + + fn accumulate_bindings(&self, bindings: &mut HashMap>) { + self.child.as_ref().unwrap().accumulate_bindings(bindings); + } +} + +struct OptionalCombinator { + matcher: Rc, + child: Option, + cursor: Cursor, + have_nonempty_match: bool, +} + +impl OptionalCombinator { + fn new(matcher: Rc, cursor: Cursor) -> Self { + Self { + matcher, + child: None, + cursor, + have_nonempty_match: false, + } + } +} + +impl Combinator for OptionalCombinator { + fn next(&mut self) -> Option { + if let Some(child) = self.child.as_mut() { + match child.next() { + result @ Some(_) => { + self.have_nonempty_match = true; + result + } + None => { + self.child = None; + None + } + } + } else { + let child_cursor = self.cursor.clone(); + let child = self.matcher.child.create_combinator(child_cursor); + self.child = Some(child); + Some(self.cursor.clone()) + } + } + + fn accumulate_bindings(&self, bindings: &mut HashMap>) { + if self.have_nonempty_match { + if let Some(child) = self.child.as_ref() { + child.accumulate_bindings(bindings); + } + } + } +} + +struct OneOrMoreCombinator { + matcher: Rc, + children: Vec, + cursor_for_next_repetition: Option, +} + +impl OneOrMoreCombinator { + fn new(matcher: Rc, cursor: Cursor) -> Self { + let cursor_for_next_repetition = Some(cursor); + Self { + matcher, + children: vec![], + cursor_for_next_repetition, + } + } +} + +impl Combinator for OneOrMoreCombinator { + fn next(&mut self) -> Option { + loop { + if let Some(cursor_for_next_repetition) = self.cursor_for_next_repetition.take() { + let next_child = self + .matcher + .child + .create_combinator(cursor_for_next_repetition); + self.children.push(next_child); + } else { + let tail = self.children.last_mut().unwrap(); + if let Some(cursor) = tail.next() { + if !cursor.is_completed() { + self.cursor_for_next_repetition = Some(cursor.clone()); + } + return Some(cursor); + } + self.children.pop(); + if self.children.is_empty() { + return None; + } + } + } + } + + fn accumulate_bindings(&self, bindings: &mut HashMap>) { + for child in &self.children { + child.accumulate_bindings(bindings); + } + } +} + +struct EllipsisCombinator { + cursor: Cursor, + has_returned_initial_empty_value: bool, +} + +impl EllipsisCombinator { + fn new(cursor: Cursor) -> Self { + Self { + cursor, + has_returned_initial_empty_value: false, + } + } +} + +impl Combinator for EllipsisCombinator { + fn next(&mut self) -> Option { + if !self.has_returned_initial_empty_value { + self.has_returned_initial_empty_value = true; + return Some(self.cursor.clone()); + } + + if self.cursor.irrevocably_go_to_next_sibling() { + return Some(self.cursor.clone()); + } + + None + } + + fn accumulate_bindings(&self, _bindings: &mut HashMap>) {} +} diff --git a/crates/solidity/outputs/cargo/crate/src/generated/query/mod.rs b/crates/solidity/outputs/cargo/crate/src/generated/query/mod.rs new file mode 100644 index 0000000000..d9dc5fb0a0 --- /dev/null +++ b/crates/solidity/outputs/cargo/crate/src/generated/query/mod.rs @@ -0,0 +1,5 @@ +// This file is generated automatically by infrastructure scripts. Please don't edit by hand. + +mod engine; +pub mod model; +mod parser; diff --git a/crates/solidity/outputs/cargo/crate/src/generated/query/model.rs b/crates/solidity/outputs/cargo/crate/src/generated/query/model.rs new file mode 100644 index 0000000000..c3bfed9170 --- /dev/null +++ b/crates/solidity/outputs/cargo/crate/src/generated/query/model.rs @@ -0,0 +1,174 @@ +// This file is generated automatically by infrastructure scripts. Please don't edit by hand. + +use std::fmt; +use std::rc::Rc; + +// This crate is copied to another crate, so all imports should be relative +use super::super::kinds::{FieldName, RuleKind, TokenKind}; + +#[derive(Clone)] +pub struct Query(pub(super) Matcher); + +impl Query { + pub fn parse(text: &str) -> Result { + Matcher::parse(text).map(Self) + } +} + +impl fmt::Display for Query { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +#[derive(Clone)] +pub(super) enum Matcher { + Binding(Rc), + Node(Rc), + Optional(Rc), + Alternatives(Rc), + Sequence(Rc), + OneOrMore(Rc), + Ellipsis, +} + +impl Matcher { + fn parse(text: &str) -> Result { + super::parser::parse_query(text) + } +} + +impl fmt::Display for Matcher { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Binding(binding) => { + write!(f, "@{} {}", binding.name, binding.child) + } + Self::Node(node) => { + if let Some(child) = &node.child { + write!(f, "[{} {}]", node.node_selector, child) + } else { + write!(f, "[{}]", node.node_selector) + } + } + Self::Optional(optional) => { + write!(f, "({})?", optional.child) + } + Self::Alternatives(alternatives) => { + let mut done_first = false; + write!(f, "(")?; + for a in &alternatives.children { + if done_first { + write!(f, " | ")?; + } else { + done_first = true; + }; + a.fmt(f)?; + } + write!(f, ")")?; + Ok(()) + } + Self::Sequence(sequence) => { + let mut done_first = false; + for a in &sequence.children { + if done_first { + write!(f, " ")?; + } else { + done_first = true; + }; + a.fmt(f)?; + } + Ok(()) + } + Self::OneOrMore(one_or_more) => { + write!(f, "({})+", one_or_more.child) + } + Self::Ellipsis => write!(f, "..."), + } + } +} + +#[derive(Copy, Clone, PartialEq, Eq)] +pub(super) enum Kind { + Rule(RuleKind), + Token(TokenKind), +} + +impl fmt::Display for Kind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Kind::Rule(rule) => write!(f, "{rule}"), + Kind::Token(token) => write!(f, "{token}"), + } + } +} + +#[derive(Clone)] +pub(super) enum NodeSelector { + Anonymous, + Kind { kind: Kind }, + Text { text: String }, + FieldName { field_name: FieldName }, + FieldNameAndKind { field_name: FieldName, kind: Kind }, + FieldNameAndText { field_name: FieldName, text: String }, +} + +impl fmt::Display for NodeSelector { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fn escape_string(string: &str) -> String { + string + .chars() + .map(|c| match c { + '"' => "\\\"".to_string(), + '\\' => "\\\\".to_string(), + '\n' => "\\n".to_string(), + '\r' => "\\r".to_string(), + '\t' => "\\t".to_string(), + '\u{08}' => "\\b".to_string(), + '\u{0c}' => "\\f".to_string(), + _ if c.is_ascii_graphic() => c.to_string(), + _ => format!("\\u{{{:x}}}", c as u32), + }) + .collect::() + } + + match self { + Self::Anonymous => write!(f, "_"), + Self::Kind { kind } => kind.fmt(f), + Self::Text { text } => write!(f, "\"{}\"", escape_string(text)), + Self::FieldName { field_name } => field_name.fmt(f), + Self::FieldNameAndKind { field_name, kind } => { + write!(f, "{field_name}; {kind}") + } + Self::FieldNameAndText { field_name, text } => { + write!(f, "{field_name}: \"{}\"", escape_string(text)) + } + } + } +} + +pub(super) struct BindingMatcher { + pub name: String, + pub child: Matcher, +} + +pub(super) struct NodeMatcher { + pub node_selector: NodeSelector, + pub child: Option, +} + +pub(super) struct SequenceMatcher { + pub children: Vec, +} + +pub(super) struct AlternativesMatcher { + pub children: Vec, +} + +pub(super) struct OptionalMatcher { + pub child: Matcher, +} + +pub(super) struct OneOrMoreMatcher { + pub child: Matcher, +} diff --git a/crates/solidity/outputs/cargo/crate/src/generated/query/parser.rs b/crates/solidity/outputs/cargo/crate/src/generated/query/parser.rs new file mode 100644 index 0000000000..cb867fb2a3 --- /dev/null +++ b/crates/solidity/outputs/cargo/crate/src/generated/query/parser.rs @@ -0,0 +1,271 @@ +// This file is generated automatically by infrastructure scripts. Please don't edit by hand. + +use std::rc::Rc; + +use nom::branch::alt; +use nom::bytes::complete::{is_not, tag, take_while, take_while1, take_while_m_n}; +use nom::character::complete::{char, multispace0, multispace1, satisfy}; +use nom::combinator::{all_consuming, map_opt, map_res, opt, recognize, value, verify}; +use nom::error::VerboseError; +use nom::multi::{fold_many0, many0, many1}; +use nom::sequence::{delimited, pair, preceded, terminated}; +use nom::{Finish, IResult, Parser}; + +// This crate is copied to another crate, so all imports should be relative +use super::super::kinds::{FieldName, RuleKind, TokenKind}; +use super::model::{ + AlternativesMatcher, BindingMatcher, Kind, Matcher, NodeMatcher, NodeSelector, + OneOrMoreMatcher, OptionalMatcher, SequenceMatcher, +}; + +pub(super) fn parse_query(input: &str) -> Result { + all_consuming(preceded( + multispace0, + opt(binding_name_token) + .and(alt(( + parse_node, + delimited( + token('('), + pair(parse_node, many1(preceded(token('|'), parse_node))), + token(')'), + ) + .map(|(first, rest)| { + let mut children = vec![first]; + children.extend(rest); + Matcher::Alternatives(Rc::new(AlternativesMatcher { children })) + }), + ))) + .map(|(binding_name, child)| { + if let Some(name) = binding_name { + Matcher::Binding(Rc::new(BindingMatcher { name, child })) + } else { + child + } + }), + )) + .parse(input) + .finish() + .map(|(_, query)| query) + .map_err(|e| e.to_string()) +} + +fn parse_node(i: &str) -> IResult<&str, Matcher, VerboseError<&str>> { + delimited( + token('['), + parse_node_selector.and(many0(parse_match)), + token(']'), + ) + .map(|(id, mut children)| { + let child = if children.is_empty() { + None + } else if children.len() == 1 { + Some(children.pop().unwrap()) + } else { + Some(Matcher::Sequence(Rc::new(SequenceMatcher { children }))) + }; + Matcher::Node(Rc::new(NodeMatcher { + node_selector: id, + child, + })) + }) + .parse(i) +} + +fn parse_node_selector(input: &str) -> IResult<&str, NodeSelector, VerboseError<&str>> { + enum Tail { + Anonymous, + Kind(Kind), + Text(String), + } + + opt(field_name_token) + .and(alt(( + token('_').map(|_| Tail::Anonymous), + kind_token.map(Tail::Kind), + text_token.map(Tail::Text), + ))) + .map(|(field_name, tail)| match (field_name, tail) { + (None, Tail::Anonymous) => NodeSelector::Anonymous, + (None, Tail::Kind(kind)) => NodeSelector::Kind { kind }, + (None, Tail::Text(string)) => NodeSelector::Text { text: string }, + (Some(field), Tail::Anonymous) => NodeSelector::FieldName { field_name: field }, + (Some(field), Tail::Kind(kind)) => NodeSelector::FieldNameAndKind { + field_name: field, + kind, + }, + (Some(field), Tail::Text(string)) => NodeSelector::FieldNameAndText { + field_name: field, + text: string, + }, + }) + .parse(input) +} + +#[derive(Clone)] +enum Quantifier { + ZeroOrOne, + ZeroOrMore, + OneOrMore, +} + +fn parse_match(input: &str) -> IResult<&str, Matcher, VerboseError<&str>> { + opt(binding_name_token) + .and(alt(( + parse_node, + pair( + delimited(token('('), many1(parse_match), token(')')), + parse_trailing_quantifier, + ) + .map(|(mut children, quantifier)| { + let child = if children.len() == 1 { + children.pop().unwrap() + } else { + Matcher::Sequence(Rc::new(SequenceMatcher { children })) + }; + match quantifier { + Quantifier::ZeroOrOne => Matcher::Optional(Rc::new(OptionalMatcher { child })), + Quantifier::ZeroOrMore => Matcher::Optional(Rc::new(OptionalMatcher { + child: Matcher::OneOrMore(Rc::new(OneOrMoreMatcher { child })), + })), + Quantifier::OneOrMore => { + Matcher::OneOrMore(Rc::new(OneOrMoreMatcher { child })) + } + } + }), + delimited( + token('('), + pair(parse_match, many1(preceded(token('|'), parse_match))), + token(')'), + ) + .map(|(first, rest)| { + let mut children = vec![first]; + children.extend(rest); + Matcher::Alternatives(Rc::new(AlternativesMatcher { children })) + }), + ellipsis_token.map(|_| Matcher::Ellipsis), + ))) + .map(|(binding, child)| { + if let Some(name) = binding { + Matcher::Binding(Rc::new(BindingMatcher { name, child })) + } else { + child + } + }) + .parse(input) +} + +fn parse_trailing_quantifier(i: &str) -> IResult<&str, Quantifier, VerboseError<&str>> { + alt(( + value(Quantifier::ZeroOrOne, token('?')), + value(Quantifier::ZeroOrMore, token('*')), + value(Quantifier::OneOrMore, token('+')), + )) + .parse(i) +} + +fn raw_identifier(i: &str) -> IResult<&str, String, VerboseError<&str>> { + let identifier_head = satisfy(|c| c.is_alphabetic()); + let is_identifier_tail = |c: char| c == '_' || c.is_alphanumeric(); + recognize(alt(( + // single underscore is the anonymous syntax item, + // so we don't allow it as an identifier + char('_').and(take_while1(is_identifier_tail)), + identifier_head.and(take_while(is_identifier_tail)), + ))) + .map(|s: &str| s.to_string()) + .parse(i) +} + +fn binding_name_token(i: &str) -> IResult<&str, String, VerboseError<&str>> { + terminated(preceded(char('@'), raw_identifier), multispace0).parse(i) +} + +fn kind_token(i: &str) -> IResult<&str, Kind, VerboseError<&str>> { + terminated(raw_identifier, multispace0) + .map(|id| { + TokenKind::try_from(id.as_str()) + .map(Kind::Token) + .or_else(|_| RuleKind::try_from(id.as_str()).map(Kind::Rule)) + .unwrap() // TODO + }) + .parse(i) +} + +fn field_name_token(i: &str) -> IResult<&str, FieldName, VerboseError<&str>> { + terminated(raw_identifier, token(':')) + .map(|id| FieldName::try_from(id.as_str()).unwrap()) + .parse(i) +} + +fn text_token(i: &str) -> IResult<&str, String, VerboseError<&str>> { + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + enum Fragment<'a> { + EscapedChar(char), + SwallowedWhitespace, + UnescapedSequence(&'a str), + } + + let escaped_char = preceded( + char('\\'), + alt(( + map_opt( + map_res( + preceded( + char('u'), + delimited( + char('{'), + // 1 to 6 hex digits + take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit()), + char('}'), + ), + ), + // converted from hex + move |hex| u32::from_str_radix(hex, 16), + ), + // converted to a char + std::char::from_u32, + ), + value('\n', char('n')), + value('\r', char('r')), + value('\t', char('t')), + value('\u{08}', char('b')), + value('\u{0C}', char('f')), + value('\\', char('\\')), + value('"', char('"')), + )), + ) + .map(Fragment::EscapedChar); + + // any amount of whitespace, collapsed to nothing + let swallowed_whitespace = value( + Fragment::SwallowedWhitespace, + preceded(char('\\'), multispace1), + ); + + let unescaped_sequence = + verify(is_not("\"\\"), |s: &str| !s.is_empty()).map(Fragment::UnescapedSequence); + + let fragment = alt((unescaped_sequence, escaped_char, swallowed_whitespace)); + + delimited( + char('"'), + fold_many0(fragment, String::new, |mut string, fragment| { + match fragment { + Fragment::EscapedChar(c) => string.push(c), + Fragment::SwallowedWhitespace => {} + Fragment::UnescapedSequence(s) => string.push_str(s), + } + string + }), + char('"'), + ) + .parse(i) +} + +fn ellipsis_token(i: &str) -> IResult<&str, &str, VerboseError<&str>> { + terminated(tag("..."), multispace0).parse(i) +} + +fn token<'input>(c: char) -> impl Parser<&'input str, char, VerboseError<&'input str>> { + terminated(char(c), multispace0) +} diff --git a/crates/solidity/outputs/cargo/crate/src/main.rs b/crates/solidity/outputs/cargo/crate/src/main.rs index 6b3097f084..54dfb99241 100644 --- a/crates/solidity/outputs/cargo/crate/src/main.rs +++ b/crates/solidity/outputs/cargo/crate/src/main.rs @@ -13,7 +13,7 @@ use slang_solidity::language::Language; // This is a known issue, and we should remove this hack once there is a better solution from Cargo. // https://github.com/rust-lang/cargo/issues/1982 mod supress_api_dependencies { - use {ariadne as _, serde as _, strum as _, strum_macros as _, thiserror as _}; + use {ariadne as _, nom as _, serde as _, strum as _, strum_macros as _, thiserror as _}; } #[derive(ClapParser, Debug)] diff --git a/crates/solidity/outputs/npm/crate/Cargo.toml b/crates/solidity/outputs/npm/crate/Cargo.toml index 23645a4fa4..2ad579a655 100644 --- a/crates/solidity/outputs/npm/crate/Cargo.toml +++ b/crates/solidity/outputs/npm/crate/Cargo.toml @@ -27,6 +27,7 @@ napi-build = { workspace = true } ariadne = { workspace = true } napi = { workspace = true } napi-derive = { workspace = true } +nom = { workspace = true } semver = { workspace = true } serde = { workspace = true } slang_solidity = { workspace = true }