diff --git a/Cargo.toml b/Cargo.toml index c119301..6f046ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,6 @@ harness = false name = "all-packages" [dependencies] -cbitset = "0.2.0" rowan = "0.12.5" smol_str = "0.1.17" diff --git a/examples/list-fns.rs b/examples/list-fns.rs index 3d20513..934a15f 100644 --- a/examples/list-fns.rs +++ b/examples/list-fns.rs @@ -1,7 +1,7 @@ use std::{env, error::Error, fs}; -use smol_str::SmolStr; use rnix::{types::*, NodeOrToken, SyntaxKind::*, SyntaxNode}; +use smol_str::SmolStr; fn main() -> Result<(), Box> { let file = match env::args().skip(1).next() { diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 0000000..a092511 --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,3 @@ +target +corpus +artifacts diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock new file mode 100644 index 0000000..17913f0 --- /dev/null +++ b/fuzz/Cargo.lock @@ -0,0 +1,106 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "arbitrary" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "577b08a4acd7b99869f863c50011b01eb73424ccc798ecd996f2e24817adfca7" + +[[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" + +[[package]] +name = "cc" +version = "1.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26a6ce4b6a484fa3edb70f7efa6fc430fd2b87285fe8b84304fd0936faa0dc0" + +[[package]] +name = "countme" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "328b822bdcba4d4e402be8d9adb6eebf269f969f8eadef977a553ff3c4fbcb58" + +[[package]] +name = "hashbrown" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" + +[[package]] +name = "libfuzzer-sys" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36a9a84a6e8b55dfefb04235e55edb2b9a2a18488fcae777a6bdaa6f06f1deb3" +dependencies = [ + "arbitrary", + "cc", + "once_cell", +] + +[[package]] +name = "memoffset" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59accc507f1338036a0477ef61afdae33cde60840f4dfe481319ce3ad116ddf9" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" + +[[package]] +name = "rnix" +version = "0.9.1" +dependencies = [ + "rowan", + "smol_str", +] + +[[package]] +name = "rnix-fuzz" +version = "0.0.0" +dependencies = [ + "libfuzzer-sys", + "rnix", +] + +[[package]] +name = "rowan" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1b36e449f3702f3b0c821411db1cbdf30fb451726a9456dce5dabcd44420043" +dependencies = [ + "countme", + "hashbrown", + "memoffset", + "rustc-hash", + "text-size", +] + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "smol_str" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b203e79e90905594272c1c97c7af701533d42adaab0beb3859018e477d54a3b0" + +[[package]] +name = "text-size" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "288cb548dbe72b652243ea797201f3d481a0609a967980fcc5b2315ea811560a" diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000..f78cfcb --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "rnix-fuzz" +version = "0.0.0" +authors = ["Automatically generated"] +publish = false +edition = "2018" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" + +[dependencies.rnix] +path = ".." + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "parser" +path = "fuzz_targets/parser.rs" +test = false +doc = false + +[[bin]] +name = "lexer" +path = "fuzz_targets/lexer.rs" +test = false +doc = false \ No newline at end of file diff --git a/fuzz/fuzz_targets/lexer.rs b/fuzz/fuzz_targets/lexer.rs new file mode 100644 index 0000000..66fffba --- /dev/null +++ b/fuzz/fuzz_targets/lexer.rs @@ -0,0 +1,8 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + if let Ok(text) = std::str::from_utf8(data) { + let _ = rnix::tokenizer::tokenize(text); + } +}); diff --git a/fuzz/fuzz_targets/parser.rs b/fuzz/fuzz_targets/parser.rs new file mode 100644 index 0000000..440e810 --- /dev/null +++ b/fuzz/fuzz_targets/parser.rs @@ -0,0 +1,14 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; + +use std::io::{self, Write}; + +fuzz_target!(|data: &[u8]| { + let stdout = io::stdout(); + let mut handle = stdout.lock(); + + if let Ok(text) = std::str::from_utf8(data) { + writeln!(handle, "Fuzzing {:?}\n\n", data).unwrap(); + let _ = rnix::parse(text); + } +}); \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 9811cb2..13c18bc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ mod macros; mod kinds; pub mod parser; +mod token_set; pub mod tokenizer; pub mod types; pub mod value; @@ -9,12 +10,13 @@ pub mod value; pub use self::{ kinds::SyntaxKind, parser::AST, + token_set::TokenSet, value::{StrPart, Value as NixValue}, }; pub use rowan::{ - NodeOrToken, SyntaxElementChildren, SyntaxNodeChildren, TextRange, TextSize, - TokenAtOffset, WalkEvent, + NodeOrToken, SyntaxElementChildren, SyntaxNodeChildren, TextRange, TextSize, TokenAtOffset, + WalkEvent, }; use self::tokenizer::Tokenizer; diff --git a/src/parser.rs b/src/parser.rs index 6dbd6e1..5238869 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -5,16 +5,14 @@ use std::{ fmt, }; -use cbitset::BitSet256; -use rowan::{Checkpoint, GreenNode, GreenNodeBuilder, Language, TextRange, TextSize}; -use smol_str::SmolStr; - use crate::{ types::{Root, TypedNode}, NixLanguage, SyntaxKind::{self, *}, - SyntaxNode, + SyntaxNode, TokenSet, }; +use rowan::{Checkpoint, GreenNode, GreenNodeBuilder, Language, TextRange, TextSize}; +use smol_str::SmolStr; const OR: &'static str = "or"; @@ -22,6 +20,7 @@ const OR: &'static str = "or"; #[derive(Clone, Debug, PartialEq)] #[non_exhaustive] pub enum ParseError { + Message(String, TextRange), /// Unexpected is used when the cause cannot be specified further Unexpected(TextRange), /// UnexpectedExtra is used when there are additional tokens to the root in the tree @@ -39,11 +38,24 @@ pub enum ParseError { impl fmt::Display for ParseError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { + ParseError::Message(msg, range) => { + write!(f, "{} at {}..{}", msg, usize::from(range.start()), usize::from(range.end())) + } ParseError::Unexpected(range) => { - write!(f, "error node at {}..{}", usize::from(range.start()), usize::from(range.end())) + write!( + f, + "error node at {}..{}", + usize::from(range.start()), + usize::from(range.end()) + ) } ParseError::UnexpectedExtra(range) => { - write!(f, "unexpected token at {}..{}", usize::from(range.start()), usize::from(range.end())) + write!( + f, + "unexpected token at {}..{}", + usize::from(range.start()), + usize::from(range.end()) + ) } ParseError::UnexpectedWanted(got, range, kinds) => write!( f, @@ -54,7 +66,12 @@ impl fmt::Display for ParseError { kinds ), ParseError::UnexpectedDoubleBind(range) => { - write!(f, "unexpected double bind at {}..{}", usize::from(range.start()), usize::from(range.end())) + write!( + f, + "unexpected double bind at {}..{}", + usize::from(range.start()), + usize::from(range.end()) + ) } ParseError::UnexpectedEOF => write!(f, "unexpected end of file"), ParseError::UnexpectedEOFWanted(kinds) => { @@ -141,6 +158,19 @@ where } } + fn at(&mut self, kind: SyntaxKind) -> bool { + self.peek().map(|kind_| kind_ == kind).unwrap_or(false) + } + + fn at_ts(&mut self, ts: TokenSet) -> bool { + self.peek().map(|kind| ts.contains(kind)).unwrap_or(false) + } + + fn error(&mut self, msg: &str) { + self.errors + .push(ParseError::Message(msg.to_string(), TextRange::empty(self.get_text_position()))) + } + fn get_text_position(&self) -> TextSize { self.consumed } @@ -209,49 +239,109 @@ where fn peek(&mut self) -> Option { self.peek_data().map(|&(t, _)| t) } - fn expect_peek_any(&mut self, allowed_slice: &[SyntaxKind]) -> Option { - let allowed: BitSet256 = allowed_slice.iter().map(|&k| k as u16).collect(); - - let next = match self.peek() { - None => None, - Some(kind) if allowed.contains(kind as usize) => Some(kind), - Some(kind) => { - let start = self.start_error_node(); - loop { - self.bump(); - if self.peek().map(|kind| allowed.contains(kind as usize)).unwrap_or(true) { - break; - } + fn peek_n(&mut self) -> [Option; N] { + let mut peeks = [None; N]; + for i in 0..N { + let mut token; + peeks[i] = loop { + token = self.iter.next(); + let kind = token.as_ref().map(|&(t, _)| t); + if let Some(token) = token { + self.buffer.push_back(token); } - let end = self.finish_error_node(); - self.errors.push(ParseError::UnexpectedWanted( - kind, - TextRange::new(start, end), - allowed_slice.to_vec().into_boxed_slice(), - )); - - self.peek() + if kind.map(|t| !t.is_trivia()).unwrap_or(true) { + break kind; + } + }; + } + peeks + } + fn expect(&mut self, expected: SyntaxKind) -> bool { + let peek = self.peek(); + match peek { + Some(kind) if expected == kind => { + self.bump(); + return true; + } + Some(kind) => self.errors.push(ParseError::UnexpectedWanted( + kind, + TextRange::empty(self.get_text_position()), + [kind].to_vec().into_boxed_slice(), + )), + None => { + self.errors + .push(ParseError::UnexpectedEOFWanted([expected].to_vec().into_boxed_slice())); } - }; - if next.is_none() { - self.errors - .push(ParseError::UnexpectedEOFWanted(allowed_slice.to_vec().into_boxed_slice())); } - next + false } - fn expect(&mut self, expected: SyntaxKind) { - if self.expect_peek_any(&[expected]).is_some() { - self.bump(); + + pub(crate) fn err_node(&mut self, message: &str) { + let start = self.start_error_node(); + self.bump(); + self.finish_error_node(); + self.errors.push(ParseError::Message( + message.to_string(), + TextRange::new(start, self.get_text_position()), + )); + } + + /// Create an error node and consume the next token. + pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) { + match self.peek() { + None => { + self.errors.push(ParseError::Message( + format!("unexpected eof: {}", message), + TextRange::empty(self.get_text_position()), + )); + } + Some(T!["{"] | T!["}"]) => { + self.errors.push(ParseError::Message( + message.to_string(), + TextRange::empty(self.get_text_position()), + )); + return; + } + _ => (), } + + if self.at_ts(recovery) { + self.errors.push(ParseError::Message( + message.to_string(), + TextRange::empty(self.get_text_position()), + )); + return; + } + + let start = self.start_error_node(); + self.bump(); + self.errors.push(ParseError::Message( + message.to_string(), + TextRange::new(start, self.get_text_position()), + )); + self.finish_error_node(); } - fn expect_ident(&mut self) { - if self.expect_peek_any(&[TOKEN_IDENT]).is_some() { + + pub(crate) fn err_and_bump(&mut self, message: &str) { + self.err_recover(message, TokenSet::EMPTY); + } + + pub(crate) fn expect_bump(&mut self, kind: SyntaxKind, msg: &str) -> bool { + if self.at(kind) { self.start_node(NODE_IDENT); self.bump(); - self.finish_node() + self.finish_node(); + true + } else { + self.err_and_bump(msg); + false } } + fn expect_ident(&mut self) { + self.expect_bump(TOKEN_IDENT, "expected identifier"); + } + fn parse_dynamic(&mut self) { self.start_node(NODE_DYNAMIC); self.bump(); @@ -266,11 +356,7 @@ where self.expect(TOKEN_STRING_START); loop { - match self.expect_peek_any(&[ - TOKEN_STRING_END, - TOKEN_STRING_CONTENT, - TOKEN_INTERPOL_START, - ]) { + match self.peek() { Some(TOKEN_STRING_CONTENT) => self.bump(), Some(TOKEN_INTERPOL_START) => { self.start_node(NODE_STRING_INTERPOL); @@ -279,7 +365,6 @@ where self.expect(TOKEN_INTERPOL_END); self.finish_node(); } - // handled by expect_peek_any _ => break, } } @@ -287,6 +372,7 @@ where self.finish_node(); } + fn next_attr(&mut self) { match self.peek() { Some(TOKEN_DYNAMIC_START) => self.parse_dynamic(), @@ -308,41 +394,42 @@ where self.finish_node(); } fn parse_pattern(&mut self, bound: bool) { - if self.peek().map(|t| t == TOKEN_CURLY_B_CLOSE).unwrap_or(true) { - self.bump(); - } else { - loop { - match self.expect_peek_any(&[TOKEN_CURLY_B_CLOSE, TOKEN_ELLIPSIS, TOKEN_IDENT]) { - Some(TOKEN_CURLY_B_CLOSE) => { - self.bump(); - break; - } - Some(TOKEN_ELLIPSIS) => { - self.bump(); - self.expect(TOKEN_CURLY_B_CLOSE); + // println!("in pattern"); + // println!("peek is: {:?}", self.peek()); + loop { + match self.peek() { + Some(TOKEN_CURLY_B_CLOSE) => { + self.bump(); + break; + } + Some(TOKEN_ELLIPSIS) => { + self.bump(); + if self.expect(TOKEN_CURLY_B_CLOSE) { break; } - Some(TOKEN_IDENT) => { - self.start_node(NODE_PAT_ENTRY); + } + Some(TOKEN_IDENT) => { + self.start_node(NODE_PAT_ENTRY); - self.expect_ident(); + self.expect_ident(); - if let Some(TOKEN_QUESTION) = self.peek() { - self.bump(); - self.parse_expr(); - } - self.finish_node(); + if let Some(TOKEN_QUESTION) = self.peek() { + self.bump(); + self.parse_expr(); + } + self.finish_node(); - match self.peek() { - Some(TOKEN_COMMA) => self.bump(), - _ => { - self.expect(TOKEN_CURLY_B_CLOSE); - break; - } - } + match self.peek() { + Some(TOKEN_COMMA) => self.bump(), + _ if self.expect(TOKEN_CURLY_B_CLOSE) => break, + _ => (), } - // handled by expect_peek_any - _ => break, + } + Some(TOKEN_COMMA) => self.err_node("misplaced comma"), + Some(_) => self.err_node("expected a pattern"), + None => { + self.error("Unexpected eof, expected a pattern"); + break; } } } @@ -364,6 +451,7 @@ where match self.peek() { None => break, token if token == Some(until) => break, + Some(TOKEN_SEMICOLON) => self.err_and_bump("misplaced semicolon"), Some(TOKEN_INHERIT) => { self.start_node(NODE_INHERIT); self.bump(); @@ -399,6 +487,7 @@ where self.bump(); // the final close, like '}' } fn parse_val(&mut self) -> Checkpoint { + // println!("parsing value"); let peek = match self.peek() { Some(it) => it, None => { @@ -411,6 +500,7 @@ where return self.builder.checkpoint(); } }; + // println!("peek is: {:?}", peek); let checkpoint = self.checkpoint(); match peek { TOKEN_PAREN_OPEN => { @@ -428,23 +518,7 @@ where self.finish_node(); } TOKEN_CURLY_B_OPEN => { - // Do a lookahead: - let mut peek = [None, None]; - for i in 0..2 { - let mut token; - peek[i] = loop { - token = self.iter.next(); - let kind = token.as_ref().map(|&(t, _)| t); - if let Some(token) = token { - self.buffer.push_back(token); - } - if kind.map(|t| !t.is_trivia()).unwrap_or(true) { - break kind; - } - }; - } - - match peek { + match self.peek_n::<2>() { [Some(TOKEN_IDENT), Some(TOKEN_COMMA)] | [Some(TOKEN_IDENT), Some(TOKEN_QUESTION)] | [Some(TOKEN_IDENT), Some(TOKEN_CURLY_B_CLOSE)] @@ -517,26 +591,8 @@ where _ => (), } } - kind => { - let start = self.start_error_node(); - self.bump(); - let end = self.finish_error_node(); - self.errors.push(ParseError::UnexpectedWanted( - kind, - TextRange::new(start, end), - [ - TOKEN_PAREN_OPEN, - TOKEN_REC, - TOKEN_CURLY_B_OPEN, - TOKEN_SQUARE_B_OPEN, - TOKEN_DYNAMIC_START, - TOKEN_STRING_START, - TOKEN_IDENT, - ] - .to_vec() - .into_boxed_slice(), - )); - } + T!["}"] => self.err_node("unmatched right brace"), + _ => self.err_and_bump("expected a value"), }; while self.peek() == Some(TOKEN_DOT) { @@ -575,14 +631,15 @@ where self.parse_fn() } } - fn handle_operation( + + fn handle_operation_left( &mut self, once: bool, next: fn(&mut Self) -> Checkpoint, - ops: &[SyntaxKind], + ops: TokenSet, ) -> Checkpoint { let checkpoint = next(self); - while self.peek().map(|t| ops.contains(&t)).unwrap_or(false) { + while self.peek().map(|t| ops.contains(t)).unwrap_or(false) { self.start_node_at(checkpoint, NODE_BIN_OP); self.bump(); next(self); @@ -593,17 +650,38 @@ where } checkpoint } + + fn handle_operation_right( + &mut self, + once: bool, + next: fn(&mut Self) -> Checkpoint, + ops: TokenSet, + ) -> Checkpoint { + let checkpoint = next(self); + if self.peek().map(|t| ops.contains(t)).unwrap_or(false) { + self.start_node_at(checkpoint, NODE_BIN_OP); + self.bump(); + if once { + next(self); + } else { + self.handle_operation_right(once, next, ops); + } + self.finish_node(); + } + checkpoint + } + fn parse_isset(&mut self) -> Checkpoint { - self.handle_operation(false, Self::parse_negate, &[TOKEN_QUESTION]) + self.handle_operation_left(false, Self::parse_negate, TOKEN_QUESTION | ()) } fn parse_concat(&mut self) -> Checkpoint { - self.handle_operation(false, Self::parse_isset, &[TOKEN_CONCAT]) + self.handle_operation_right(false, Self::parse_isset, TOKEN_CONCAT | ()) } fn parse_mul(&mut self) -> Checkpoint { - self.handle_operation(false, Self::parse_concat, &[TOKEN_MUL, TOKEN_DIV]) + self.handle_operation_left(false, Self::parse_concat, TOKEN_MUL | TOKEN_DIV) } fn parse_add(&mut self) -> Checkpoint { - self.handle_operation(false, Self::parse_mul, &[TOKEN_ADD, TOKEN_SUB]) + self.handle_operation_left(false, Self::parse_mul, TOKEN_ADD | TOKEN_SUB) } fn parse_invert(&mut self) -> Checkpoint { if self.peek() == Some(TOKEN_INVERT) { @@ -618,26 +696,26 @@ where } } fn parse_merge(&mut self) -> Checkpoint { - self.handle_operation(false, Self::parse_invert, &[TOKEN_UPDATE]) + self.handle_operation_right(false, Self::parse_invert, TOKEN_UPDATE | ()) } fn parse_compare(&mut self) -> Checkpoint { - self.handle_operation( + self.handle_operation_left( true, Self::parse_merge, - &[TOKEN_LESS, TOKEN_LESS_OR_EQ, TOKEN_MORE, TOKEN_MORE_OR_EQ], + TOKEN_LESS | TOKEN_LESS_OR_EQ | TOKEN_MORE | TOKEN_MORE_OR_EQ, ) } fn parse_equal(&mut self) -> Checkpoint { - self.handle_operation(true, Self::parse_compare, &[TOKEN_EQUAL, TOKEN_NOT_EQUAL]) + self.handle_operation_left(true, Self::parse_compare, TOKEN_EQUAL | TOKEN_NOT_EQUAL) } fn parse_and(&mut self) -> Checkpoint { - self.handle_operation(false, Self::parse_equal, &[TOKEN_AND]) + self.handle_operation_left(false, Self::parse_equal, TOKEN_AND | ()) } fn parse_or(&mut self) -> Checkpoint { - self.handle_operation(false, Self::parse_and, &[TOKEN_OR]) + self.handle_operation_left(false, Self::parse_and, TOKEN_OR | ()) } fn parse_implication(&mut self) -> Checkpoint { - self.handle_operation(false, Self::parse_or, &[TOKEN_IMPLICATION]) + self.handle_operation_right(false, Self::parse_or, TOKEN_IMPLICATION | ()) } #[inline(always)] fn parse_math(&mut self) -> Checkpoint { @@ -696,6 +774,16 @@ where self.finish_node(); checkpoint } + Some(T!["}"]) => { + let start = self.start_error_node(); + self.bump(); + self.finish_error_node(); + self.errors.push(ParseError::Message( + "unmatched right brace".to_string(), + TextRange::new(start, self.get_text_position()), + )); + self.checkpoint() + } _ => self.parse_math(), } } @@ -729,6 +817,39 @@ mod tests { use std::{ffi::OsStr, fmt::Write, fs, path::PathBuf}; + fn check_parser(bytes: &[u8]) { + let s = std::str::from_utf8(bytes).unwrap(); + println!("'{}'", s); + crate::parse(s); + } + + #[test] + fn smoke() { + // check_parser(&[91, 27, 27, 127, 27, 125]); + // check_parser(&[91, 125, 0]); + // check_parser(&[ + // 116, 123, 105, 110, 104, 101, 114, 105, 116, 1, 0, 0, 0, 0, 0, 24, 101, 114, 105, 116, + // 41, 116, 123, 62, 89, 108, 89, 108, 101, 125, 125, 123, + // ]); + // check_parser(&[91, 45]) + // check_parser(&[89, 64, 60, 44, 45, 45, 58]) + // check_parser(&[116, 64, 91, 123, 49, 91, 91, 91, 49, 91, 91, 26]) + } + + #[test] + fn smoke2() { + let ast = crate::parse("true -> true -> true -> true"); + let ast = crate::parse("1 ++ 2 ++ 3 ++ 4"); + panic!("{}", ast.root().dump()); + } + + // #[test] + // fn smoke() { + // let code = "+ 1"; + // let ast = crate::parse(code); + // panic!("{:?}", ast.errors()); + // } + #[test] fn whitespace_attachment_for_incomplete_code1() { let code = "{ @@ -836,6 +957,7 @@ NODE_ROOT 0..5 { #[rustfmt::skip] mod dir_tests { use super::test_dir; + #[test] fn general() { test_dir("general"); diff --git a/src/token_set.rs b/src/token_set.rs new file mode 100644 index 0000000..dda8370 --- /dev/null +++ b/src/token_set.rs @@ -0,0 +1,85 @@ +use std::ops; + +use crate::SyntaxKind; + +/// A bit-set of `SyntaxKind`s +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct TokenSet(u128); + +impl TokenSet { + pub(crate) const EMPTY: TokenSet = TokenSet(0); + + pub(crate) const fn new(kind: SyntaxKind) -> TokenSet { + TokenSet(mask(kind)) + } + + pub(crate) const fn from_slice(kinds: &[SyntaxKind]) -> TokenSet { + let mut res = 0u128; + let mut i = 0; + while i < kinds.len() { + res |= mask(kinds[i]); + i += 1 + } + TokenSet(res) + } + + pub(crate) const fn union(self, other: TokenSet) -> TokenSet { + TokenSet(self.0 | other.0) + } + + pub(crate) const fn contains(&self, kind: SyntaxKind) -> bool { + self.0 & mask(kind) != 0 + } +} + +const fn mask(kind: SyntaxKind) -> u128 { + 1u128 << (kind as usize) +} + +impl ops::BitOr for SyntaxKind { + type Output = TokenSet; + + fn bitor(self, rhs: Self) -> Self::Output { + TokenSet(mask(self) | mask(rhs)) + } +} + +impl ops::BitOr for TokenSet { + type Output = TokenSet; + + fn bitor(self, rhs: SyntaxKind) -> Self::Output { + self.union(TokenSet(mask(rhs))) + } +} + +impl ops::BitOr for SyntaxKind { + type Output = TokenSet; + + fn bitor(self, rhs: TokenSet) -> Self::Output { + TokenSet(mask(self)).union(rhs) + } +} + +impl ops::BitOr for TokenSet { + type Output = TokenSet; + + fn bitor(self, rhs: TokenSet) -> Self::Output { + self.union(rhs) + } +} + +impl ops::BitOr for () { + type Output = TokenSet; + + fn bitor(self, rhs: SyntaxKind) -> Self::Output { + TokenSet::new(rhs) + } +} + +impl ops::BitOr<()> for SyntaxKind { + type Output = TokenSet; + + fn bitor(self, (): ()) -> Self::Output { + TokenSet::new(self) + } +} diff --git a/src/tokenizer.rs b/src/tokenizer.rs index ecdf0ed..e8c83d3 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -430,18 +430,18 @@ impl<'a> Iterator for Tokenizer<'a> { } } +pub fn tokenize(input: &str) -> Vec<(SyntaxKind, SmolStr)> { + Tokenizer::new(input).collect() +} + #[cfg(test)] mod tests { use super::{ + tokenize, SyntaxKind::{self, *}, - Tokenizer, }; use smol_str::SmolStr; - fn tokenize(input: &str) -> Vec<(SyntaxKind, SmolStr)> { - Tokenizer::new(input).collect() - } - macro_rules! tokens { ($(($token:expr, $str:expr),)*) => { vec![$(($token, $str.into()),)*] diff --git a/src/types.rs b/src/types.rs index b5aa033..d0f68b8 100644 --- a/src/types.rs +++ b/src/types.rs @@ -127,7 +127,12 @@ impl fmt::Display for TextDump { if let NodeOrToken::Token(token) = enter { write!(f, "(\"{}\")", token.text().escape_default())? } - write!(f, " {}..{}", usize::from(enter.text_range().start()), usize::from(enter.text_range().end()))?; + write!( + f, + " {}..{}", + usize::from(enter.text_range().start()), + usize::from(enter.text_range().end()) + )?; if let NodeOrToken::Node(_) = enter { write!(f, " {{")?; } diff --git a/test_data/parser/assocation/1.expect b/test_data/parser/assocation/1.expect new file mode 100644 index 0000000..e69de29 diff --git a/test_data/parser/assocation/1.nix b/test_data/parser/assocation/1.nix new file mode 100644 index 0000000..8a2a3c5 --- /dev/null +++ b/test_data/parser/assocation/1.nix @@ -0,0 +1 @@ +1 + 2 + 3 \ No newline at end of file