-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fill in missing CST node names (#518)
Adds validation to make sure that: - Productions cannot reference itself, except operators. - Required productions cannot be inlined. - Parsers cannot reference inlined scanners. - Scanners can only reference inlined scanners. - Parser names should be unique, except operator definitions, and parsers defined in different versions. - Prevent useless nesting of parsers if they don't change the output. - make sure keywords are always referenced as productions instead of terminals everywhere in grammar, to guarantee that the correct version checks run on them. Closes #353
- Loading branch information
1 parent
8bd5446
commit b3b562b
Showing
591 changed files
with
17,973 additions
and
13,931 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
"changelog": minor | ||
--- | ||
|
||
fill in missing CST node names |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
171 changes: 171 additions & 0 deletions
171
crates/codegen/schema/src/validation/rules/definitions/keywords/collector.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
use std::collections::HashMap; | ||
|
||
use crate::{ | ||
types::{LanguageDefinitionRef, ProductionRef, ScannerDefinition, ScannerRef}, | ||
validation::visitors::{run_visitor, LocationRef, Reporter, Visitor}, | ||
}; | ||
|
||
pub struct KeywordsCollector { | ||
keywords: HashMap<String, ProductionRef>, | ||
current_production: Option<ProductionRef>, | ||
} | ||
|
||
impl KeywordsCollector { | ||
pub fn collect( | ||
language: &LanguageDefinitionRef, | ||
reporter: &mut Reporter, | ||
) -> HashMap<String, ProductionRef> { | ||
let mut instance = Self { | ||
keywords: HashMap::new(), | ||
current_production: None, | ||
}; | ||
|
||
run_visitor(&mut instance, language, reporter); | ||
|
||
return instance.keywords; | ||
} | ||
} | ||
|
||
impl Visitor for KeywordsCollector { | ||
fn visit_production( | ||
&mut self, | ||
production: &crate::types::ProductionRef, | ||
_location: &LocationRef, | ||
_reporter: &mut Reporter, | ||
) -> bool { | ||
self.current_production = Some(production.to_owned()); | ||
return true; | ||
} | ||
|
||
fn visit_parser( | ||
&mut self, | ||
_parser: &crate::types::ParserRef, | ||
_location: &LocationRef, | ||
_reporter: &mut Reporter, | ||
) -> bool { | ||
return false; | ||
} | ||
|
||
fn visit_scanner( | ||
&mut self, | ||
scanner: &ScannerRef, | ||
location: &LocationRef, | ||
reporter: &mut Reporter, | ||
) -> bool { | ||
let identifier = | ||
if let ScannerDefinition::TrailingContext { expression, .. } = &scanner.definition { | ||
expression | ||
} else { | ||
return true; | ||
}; | ||
|
||
let variations = if let Some(variations) = Self::collect_variations(identifier) { | ||
variations | ||
} else { | ||
return false; | ||
}; | ||
|
||
let current_production = self.current_production.as_ref().unwrap(); | ||
|
||
for variation in &variations { | ||
match self.keywords.get(variation) { | ||
Some(existing_production) => { | ||
reporter.report( | ||
location, | ||
Errors::KeywordAlreadyDefined( | ||
variation.to_owned(), | ||
existing_production.name.to_owned(), | ||
), | ||
); | ||
} | ||
None => { | ||
self.keywords | ||
.insert(variation.to_owned(), current_production.to_owned()); | ||
} | ||
}; | ||
} | ||
|
||
return false; | ||
} | ||
} | ||
|
||
impl KeywordsCollector { | ||
fn collect_variations(scanner: &ScannerRef) -> Option<Vec<String>> { | ||
// TODO: manually calculate variations until keywords are implemented. | ||
// Once that is done, report error if any scanners have variations, as they should be keywords. | ||
// https://github.com/NomicFoundation/slang/issues/505 | ||
|
||
match &scanner.definition { | ||
ScannerDefinition::Choice(choices) => { | ||
let mut variations = Vec::new(); | ||
for choice in choices { | ||
variations.extend(Self::collect_variations(&choice)?); | ||
} | ||
|
||
return Some(variations); | ||
} | ||
ScannerDefinition::Difference { minuend, .. } => { | ||
return Self::collect_variations(minuend); | ||
} | ||
ScannerDefinition::Optional(child) => { | ||
let mut variations = Self::collect_variations(child)?; | ||
variations.push("".to_owned()); | ||
return Some(variations); | ||
} | ||
ScannerDefinition::Range { from, to } => { | ||
let mut variations = Vec::new(); | ||
for i in *from..=*to { | ||
variations.push(i.to_string()); | ||
} | ||
|
||
return Some(variations); | ||
} | ||
ScannerDefinition::Sequence(children) => { | ||
let mut existing_variations = vec![]; | ||
|
||
for child in children { | ||
let new_variations = Self::collect_variations(child)?; | ||
|
||
existing_variations = if existing_variations.is_empty() { | ||
new_variations | ||
} else { | ||
let mut combined = vec![]; | ||
|
||
for existing in &existing_variations { | ||
for new in &new_variations { | ||
combined.push(format!("{existing}{new}")); | ||
} | ||
} | ||
|
||
combined | ||
} | ||
} | ||
|
||
return Some(existing_variations); | ||
} | ||
ScannerDefinition::TrailingContext { expression, .. } => { | ||
return Self::collect_variations(expression); | ||
} | ||
ScannerDefinition::Terminal(terminal) => { | ||
if terminal.chars().all(|c| c == '_' || c.is_alphanumeric()) { | ||
return Some(vec![terminal.to_owned()]); | ||
} else { | ||
return None; | ||
} | ||
} | ||
ScannerDefinition::Not(_) | ||
| ScannerDefinition::OneOrMore(_) | ||
| ScannerDefinition::Reference(_) | ||
| ScannerDefinition::ZeroOrMore(_) => { | ||
// Cannot be a keyword | ||
return None; | ||
} | ||
}; | ||
} | ||
} | ||
|
||
#[derive(thiserror::Error, Debug)] | ||
pub(crate) enum Errors { | ||
#[error("Keyword '{0}' is already defined in production '{1}'.")] | ||
KeywordAlreadyDefined(String, String), | ||
} |
22 changes: 22 additions & 0 deletions
22
crates/codegen/schema/src/validation/rules/definitions/keywords/mod.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
mod collector; | ||
mod validator; | ||
|
||
use crate::{ | ||
types::LanguageDefinitionRef, | ||
validation::{ | ||
rules::definitions::keywords::{ | ||
collector::KeywordsCollector, validator::KeywordsValidator, | ||
}, | ||
visitors::Reporter, | ||
}, | ||
}; | ||
|
||
pub struct Keywords; | ||
|
||
impl Keywords { | ||
pub fn validate(language: &LanguageDefinitionRef, reporter: &mut Reporter) { | ||
let keywords = KeywordsCollector::collect(language, reporter); | ||
|
||
KeywordsValidator::validate(language, keywords, reporter); | ||
} | ||
} |
87 changes: 87 additions & 0 deletions
87
crates/codegen/schema/src/validation/rules/definitions/keywords/validator.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
use std::collections::HashMap; | ||
|
||
use crate::{ | ||
types::{LanguageDefinitionRef, ProductionRef, ScannerDefinition, ScannerRef}, | ||
validation::{ | ||
rules::utils::is_a_keyword_scanner, | ||
visitors::{run_visitor, LocationRef, Reporter, Visitor}, | ||
}, | ||
}; | ||
|
||
pub struct KeywordsValidator { | ||
keywords: HashMap<String, ProductionRef>, | ||
current_production: Option<ProductionRef>, | ||
} | ||
|
||
impl KeywordsValidator { | ||
pub fn validate( | ||
language: &LanguageDefinitionRef, | ||
keywords: HashMap<String, ProductionRef>, | ||
reporter: &mut Reporter, | ||
) { | ||
let mut instance = Self { | ||
keywords, | ||
current_production: None, | ||
}; | ||
|
||
run_visitor(&mut instance, language, reporter); | ||
} | ||
} | ||
|
||
impl Visitor for KeywordsValidator { | ||
fn visit_production( | ||
&mut self, | ||
production: &crate::types::ProductionRef, | ||
_location: &LocationRef, | ||
_reporter: &mut Reporter, | ||
) -> bool { | ||
self.current_production = Some(production.to_owned()); | ||
|
||
// Skip validation if this is a keyword: | ||
return !is_a_keyword_scanner(&production.name); | ||
} | ||
|
||
fn visit_parser( | ||
&mut self, | ||
_parser: &crate::types::ParserRef, | ||
_location: &LocationRef, | ||
_reporter: &mut Reporter, | ||
) -> bool { | ||
return false; | ||
} | ||
|
||
fn visit_scanner( | ||
&mut self, | ||
scanner: &ScannerRef, | ||
location: &LocationRef, | ||
reporter: &mut Reporter, | ||
) -> bool { | ||
if let ScannerDefinition::Terminal(terminal) = &scanner.definition { | ||
if let Some(production) = self.keywords.get(terminal) { | ||
if production.name != self.current_production.as_ref().unwrap().name { | ||
reporter.report( | ||
location, | ||
Errors::ShouldReferenceExistingKeyword(production.name.to_owned()), | ||
); | ||
} | ||
} | ||
} | ||
|
||
return true; | ||
} | ||
|
||
fn visit_precedence_parser( | ||
&mut self, | ||
_precedence_parser: &crate::types::PrecedenceParserRef, | ||
_location: &LocationRef, | ||
_reporter: &mut Reporter, | ||
) -> bool { | ||
return false; | ||
} | ||
} | ||
|
||
#[derive(thiserror::Error, Debug)] | ||
pub(crate) enum Errors { | ||
#[error("You should reference the existing keyword '{0}' instead.")] | ||
ShouldReferenceExistingKeyword(String), | ||
} |
Oops, something went wrong.