Skip to content

Commit

Permalink
Collect syntax kinds directly from DSL v2 and isolate parser genera…
Browse files Browse the repository at this point in the history
…tion logic (NomicFoundation#991)

Work towards NomicFoundation#638 

Mainly:
- We bundle `quote`ing or any (old) parser codegen now in the new
`generator/src/parser/codegen` module
- To expand the `kinds.rs.jinja2` we use a new, dedicated `KindsModel`
to disentangle, limiting the workarounds like removing the built-in
labels only after visiting the grammar
- simplified collecting the `ParserModel.referenced_versions` (it's de
facto `Language::collect_breaking_changes` but with a caveat; I'll
submit a separate PR for that since it's not 100% obvious and would
change how we define `ParserModel`).
- and some minor fixes like `pub(super)` visibility or updated
referenced issues.

Now, the remaining work will be to actually revamp how we visit the
resulting grammar.
  • Loading branch information
Xanewok authored Jun 4, 2024
1 parent 856e34d commit 12e0c18
Show file tree
Hide file tree
Showing 17 changed files with 216 additions and 199 deletions.
20 changes: 20 additions & 0 deletions crates/codegen/language/definition/src/model/item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,24 @@ impl Item {
Item::Fragment { item } => &item.name,
}
}

/// Whether the language item corresponds to a dedicated terminal kind.
pub fn is_terminal(&self) -> bool {
// NOTE: `Item::Fragment` is inlined.
matches!(
self,
Item::Trivia { .. } | Item::Keyword { .. } | Item::Token { .. }
)
}

pub fn is_nonterminal(&self) -> bool {
matches!(
self,
Item::Struct { .. }
| Item::Enum { .. }
| Item::Repeated { .. }
| Item::Separated { .. }
| Item::Precedence { .. }
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,16 @@ impl VersionSpecifier {
VersionSpecifier::Range { from, till } => from <= version && version < till,
}
}

/// Returns an iterator over the versions specified as the upper and lower bound.
pub fn versions(&self) -> impl Iterator<Item = &Version> {
match self {
VersionSpecifier::Never => [None, None],
VersionSpecifier::From { from } => [Some(from), None],
VersionSpecifier::Till { till } => [None, Some(till)],
VersionSpecifier::Range { from, till } => [Some(from), Some(till)],
}
.into_iter()
.flatten()
}
}
12 changes: 6 additions & 6 deletions crates/codegen/runtime/cargo/src/runtime/kinds.rs.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ pub enum NonterminalKind {
Stub2,
Stub3,
{%- else -%}
{%- for variant in model.parser.nonterminal_kinds -%}
{%- for variant in model.kinds.nonterminal_kinds -%}
{# variant.documentation | indent(prefix = "/// ", first = true, blank = true) #}
{{ variant }},
{%- endfor -%}
Expand Down Expand Up @@ -65,7 +65,7 @@ pub enum EdgeLabel {
Stub2,
Stub3,
{%- else -%}
{% for variant in model.parser.labels -%}
{% for variant in model.kinds.labels -%}
{{ variant | pascal_case }},
{%- endfor -%}
{%- endif -%}
Expand Down Expand Up @@ -98,7 +98,7 @@ pub enum TerminalKind {
Stub2,
Stub3,
{%- else -%}
{%- for variant in model.parser.terminal_kinds -%}
{%- for variant in model.kinds.terminal_kinds -%}
{# variant.documentation | indent(prefix = "/// ", first = true, blank = true) #}
{{ variant }},
{%- endfor -%}
Expand All @@ -112,7 +112,7 @@ impl metaslang_cst::TerminalKind for TerminalKind {
{%- else -%}
matches!(
self,
{%- for variant in model.parser.trivia_scanner_names -%}
{%- for variant in model.kinds.trivia_scanner_names -%}
| Self::{{ variant }}
{%- endfor -%}
)
Expand All @@ -128,7 +128,7 @@ pub(crate) enum LexicalContext {
Stub2,
Stub3,
{%- else -%}
{%- for context_name, _ in model.parser.scanner_contexts %}
{%- for context_name in model.kinds.lexical_contexts %}
{{ context_name }},
{%- endfor %}
{%- endif -%}
Expand All @@ -143,7 +143,7 @@ pub(crate) trait IsLexicalContext {
#[allow(non_snake_case)]
pub(crate) mod LexicalContextType {
{%- if not rendering_in_stubs -%}
{%- for context_name, _ in model.parser.scanner_contexts %}
{%- for context_name in model.kinds.lexical_contexts %}
pub struct {{ context_name }};

impl super::IsLexicalContext for {{ context_name }} {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ impl Lexer for Language {
if kw_scan == KeywordScan::Absent {
input.set_position(save);

// TODO(#638): Don't allocate a string here
// TODO(#1001): Don't allocate a string here
let ident_value = input.content(save.utf8..furthest_position.utf8);

for keyword_compound_scanner in [
Expand Down
98 changes: 98 additions & 0 deletions crates/codegen/runtime/generator/src/kinds.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
use std::collections::BTreeSet;

use codegen_language_definition::model::{self, Identifier, Item};
use serde::Serialize;

#[derive(Default, Serialize)]
pub struct KindsModel {
/// Defines the `NonterminalKind` enum variants.
nonterminal_kinds: BTreeSet<Identifier>,
/// Defines the `TerminalKind` enum variants.
terminal_kinds: BTreeSet<Identifier>,
/// Defines `TerminalKind::is_trivia` method.
trivia_scanner_names: BTreeSet<Identifier>,
/// Defines `EdgeLabel` enum variants.
labels: BTreeSet<Identifier>,
// Defines the `LexicalContext(Type)` enum and type-level variants.
lexical_contexts: BTreeSet<Identifier>,
}

impl KindsModel {
pub fn create(language: &model::Language) -> Self {
let terminal_kinds = language
.items()
.filter(|item| item.is_terminal() && !matches!(item, Item::Fragment { .. }))
.map(|item| item.name().clone())
.collect();

let mut nonterminal_kinds = BTreeSet::default();
for item in language.items() {
match item {
Item::Struct { item } => {
nonterminal_kinds.insert(item.name.clone());
}
Item::Enum { item } => {
nonterminal_kinds.insert(item.name.clone());
}
Item::Repeated { item } => {
nonterminal_kinds.insert(item.name.clone());
}
Item::Separated { item } => {
nonterminal_kinds.insert(item.name.clone());
}
Item::Precedence { item } => {
nonterminal_kinds.insert(item.name.clone());
for op in &item.precedence_expressions {
nonterminal_kinds.insert(op.name.clone());
}
}
// Terminals
_ => {}
}
}

let trivia_scanner_names = language
.items()
.filter_map(|item| match item {
Item::Trivia { item } => Some(item.name.clone()),
_ => None,
})
.collect();

let mut labels = BTreeSet::default();
for item in language.items() {
match item {
Item::Struct { item } => {
for field_name in item.fields.keys() {
labels.insert(field_name.clone());
}
}
Item::Precedence { item } => {
for item in &item.precedence_expressions {
for item in &item.operators {
for field_name in item.fields.keys() {
labels.insert(field_name.clone());
}
}
}
}
_ => {}
}
}

let lexical_contexts: BTreeSet<_> = language
.topics()
.filter_map(|t| t.lexical_context.as_ref())
.cloned()
.chain(std::iter::once(Identifier::from("Default")))
.collect();

KindsModel {
nonterminal_kinds,
terminal_kinds,
trivia_scanner_names,
labels,
lexical_contexts,
}
}
}
1 change: 1 addition & 0 deletions crates/codegen/runtime/generator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use serde::Serialize;
use crate::model::RuntimeModel;

mod ast;
mod kinds;
mod model;
mod parser;

Expand Down
3 changes: 3 additions & 0 deletions crates/codegen/runtime/generator/src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use semver::Version;
use serde::Serialize;

use crate::ast::AstModel;
use crate::kinds::KindsModel;
use crate::parser::ParserModel;

#[derive(Default, Serialize)]
Expand All @@ -14,6 +15,7 @@ pub struct RuntimeModel {
all_versions: BTreeSet<Version>,
parser: ParserModel,
ast: AstModel,
kinds: KindsModel,
}

impl RuntimeModel {
Expand All @@ -22,6 +24,7 @@ impl RuntimeModel {
all_versions: language.versions.iter().cloned().collect(),
ast: AstModel::create(language),
parser: ParserModel::from_language(language),
kinds: KindsModel::create(language),
}
}
}
Loading

0 comments on commit 12e0c18

Please sign in to comment.