Skip to content

Commit

Permalink
fill in missing CST node names (#518)
Browse files Browse the repository at this point in the history
Adds validation to make sure that:

- Productions cannot reference itself, except operators.
- Required productions cannot be inlined.
- Parsers cannot reference inlined scanners.
- Scanners can only reference inlined scanners.
- Parser names should be unique, except operator definitions, and
parsers defined in different versions.
- Prevent useless nesting of parsers if they don't change the output.
- make sure keywords are always referenced as productions instead of
terminals everywhere in grammar, to guarantee that the correct version
checks run on them.

Closes #353
  • Loading branch information
OmarTawfik committed Jul 7, 2023
1 parent 8bd5446 commit b3b562b
Show file tree
Hide file tree
Showing 591 changed files with 17,973 additions and 13,931 deletions.
5 changes: 5 additions & 0 deletions .changeset/plenty-chefs-report.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"changelog": minor
---

fill in missing CST node names
3 changes: 1 addition & 2 deletions crates/codegen/schema/src/validation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,9 @@ use crate::types::LanguageDefinitionRef;
pub fn validate_language(language: &LanguageDefinitionRef) -> CodegenResult<()> {
// Validation should stop at each step if there are any errors:

rules::language_versions::run(language)?;
rules::definitions::run(language)?;
rules::references::run(language)?;
rules::empty_roots::run(language)?;
rules::lints::run(language)?;

return Ok(());
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
use std::collections::HashMap;

use crate::{
types::{LanguageDefinitionRef, ProductionRef, ScannerDefinition, ScannerRef},
validation::visitors::{run_visitor, LocationRef, Reporter, Visitor},
};

pub struct KeywordsCollector {
keywords: HashMap<String, ProductionRef>,
current_production: Option<ProductionRef>,
}

impl KeywordsCollector {
pub fn collect(
language: &LanguageDefinitionRef,
reporter: &mut Reporter,
) -> HashMap<String, ProductionRef> {
let mut instance = Self {
keywords: HashMap::new(),
current_production: None,
};

run_visitor(&mut instance, language, reporter);

return instance.keywords;
}
}

impl Visitor for KeywordsCollector {
fn visit_production(
&mut self,
production: &crate::types::ProductionRef,
_location: &LocationRef,
_reporter: &mut Reporter,
) -> bool {
self.current_production = Some(production.to_owned());
return true;
}

fn visit_parser(
&mut self,
_parser: &crate::types::ParserRef,
_location: &LocationRef,
_reporter: &mut Reporter,
) -> bool {
return false;
}

fn visit_scanner(
&mut self,
scanner: &ScannerRef,
location: &LocationRef,
reporter: &mut Reporter,
) -> bool {
let identifier =
if let ScannerDefinition::TrailingContext { expression, .. } = &scanner.definition {
expression
} else {
return true;
};

let variations = if let Some(variations) = Self::collect_variations(identifier) {
variations
} else {
return false;
};

let current_production = self.current_production.as_ref().unwrap();

for variation in &variations {
match self.keywords.get(variation) {
Some(existing_production) => {
reporter.report(
location,
Errors::KeywordAlreadyDefined(
variation.to_owned(),
existing_production.name.to_owned(),
),
);
}
None => {
self.keywords
.insert(variation.to_owned(), current_production.to_owned());
}
};
}

return false;
}
}

impl KeywordsCollector {
fn collect_variations(scanner: &ScannerRef) -> Option<Vec<String>> {
// TODO: manually calculate variations until keywords are implemented.
// Once that is done, report error if any scanners have variations, as they should be keywords.
// https://github.com/NomicFoundation/slang/issues/505

match &scanner.definition {
ScannerDefinition::Choice(choices) => {
let mut variations = Vec::new();
for choice in choices {
variations.extend(Self::collect_variations(&choice)?);
}

return Some(variations);
}
ScannerDefinition::Difference { minuend, .. } => {
return Self::collect_variations(minuend);
}
ScannerDefinition::Optional(child) => {
let mut variations = Self::collect_variations(child)?;
variations.push("".to_owned());
return Some(variations);
}
ScannerDefinition::Range { from, to } => {
let mut variations = Vec::new();
for i in *from..=*to {
variations.push(i.to_string());
}

return Some(variations);
}
ScannerDefinition::Sequence(children) => {
let mut existing_variations = vec![];

for child in children {
let new_variations = Self::collect_variations(child)?;

existing_variations = if existing_variations.is_empty() {
new_variations
} else {
let mut combined = vec![];

for existing in &existing_variations {
for new in &new_variations {
combined.push(format!("{existing}{new}"));
}
}

combined
}
}

return Some(existing_variations);
}
ScannerDefinition::TrailingContext { expression, .. } => {
return Self::collect_variations(expression);
}
ScannerDefinition::Terminal(terminal) => {
if terminal.chars().all(|c| c == '_' || c.is_alphanumeric()) {
return Some(vec![terminal.to_owned()]);
} else {
return None;
}
}
ScannerDefinition::Not(_)
| ScannerDefinition::OneOrMore(_)
| ScannerDefinition::Reference(_)
| ScannerDefinition::ZeroOrMore(_) => {
// Cannot be a keyword
return None;
}
};
}
}

#[derive(thiserror::Error, Debug)]
pub(crate) enum Errors {
#[error("Keyword '{0}' is already defined in production '{1}'.")]
KeywordAlreadyDefined(String, String),
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
mod collector;
mod validator;

use crate::{
types::LanguageDefinitionRef,
validation::{
rules::definitions::keywords::{
collector::KeywordsCollector, validator::KeywordsValidator,
},
visitors::Reporter,
},
};

pub struct Keywords;

impl Keywords {
pub fn validate(language: &LanguageDefinitionRef, reporter: &mut Reporter) {
let keywords = KeywordsCollector::collect(language, reporter);

KeywordsValidator::validate(language, keywords, reporter);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
use std::collections::HashMap;

use crate::{
types::{LanguageDefinitionRef, ProductionRef, ScannerDefinition, ScannerRef},
validation::{
rules::utils::is_a_keyword_scanner,
visitors::{run_visitor, LocationRef, Reporter, Visitor},
},
};

pub struct KeywordsValidator {
keywords: HashMap<String, ProductionRef>,
current_production: Option<ProductionRef>,
}

impl KeywordsValidator {
pub fn validate(
language: &LanguageDefinitionRef,
keywords: HashMap<String, ProductionRef>,
reporter: &mut Reporter,
) {
let mut instance = Self {
keywords,
current_production: None,
};

run_visitor(&mut instance, language, reporter);
}
}

impl Visitor for KeywordsValidator {
fn visit_production(
&mut self,
production: &crate::types::ProductionRef,
_location: &LocationRef,
_reporter: &mut Reporter,
) -> bool {
self.current_production = Some(production.to_owned());

// Skip validation if this is a keyword:
return !is_a_keyword_scanner(&production.name);
}

fn visit_parser(
&mut self,
_parser: &crate::types::ParserRef,
_location: &LocationRef,
_reporter: &mut Reporter,
) -> bool {
return false;
}

fn visit_scanner(
&mut self,
scanner: &ScannerRef,
location: &LocationRef,
reporter: &mut Reporter,
) -> bool {
if let ScannerDefinition::Terminal(terminal) = &scanner.definition {
if let Some(production) = self.keywords.get(terminal) {
if production.name != self.current_production.as_ref().unwrap().name {
reporter.report(
location,
Errors::ShouldReferenceExistingKeyword(production.name.to_owned()),
);
}
}
}

return true;
}

fn visit_precedence_parser(
&mut self,
_precedence_parser: &crate::types::PrecedenceParserRef,
_location: &LocationRef,
_reporter: &mut Reporter,
) -> bool {
return false;
}
}

#[derive(thiserror::Error, Debug)]
pub(crate) enum Errors {
#[error("You should reference the existing keyword '{0}' instead.")]
ShouldReferenceExistingKeyword(String),
}
Loading

0 comments on commit b3b562b

Please sign in to comment.