From ebef162038fc5be5b4ad73634c2c86b9bdc6386f Mon Sep 17 00:00:00 2001 From: Niels Saurer Date: Sun, 21 Nov 2021 14:46:56 +0100 Subject: [PATCH] Fix /* */ multiline comment lexer example The previous multiline comments example captures everything between the first `/*` and the last `*/` unless I'm mistaken. See for example below grammar: It parses `/* */ poc1 /* */ poc2` into `poc2`, but it should be `poc1`, this is fixed with the new updated regex (from here: https://stackoverflow.com/a/36328890) ``` grammar(); match { r"\s*" => { }, // The default whitespace skipping is disabled an `ignore pattern` is specified r"//[^\n\r]*[\n\r]*" => { }, // Skip `// comments` r"/\*([^\*]*\*+[^\*/])*([^\*]*\*+|[^\*])*\*/" => { }, // Skip `/* comments */` _ } pub Poc: String = { => <>, Poc => <>, } Name: String = r"[A-Za-z_][A-Za-z0-9_]*!?" => (<>).to_owned(); ``` --- doc/src/lexer_tutorial/001_lexer_gen.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/lexer_tutorial/001_lexer_gen.md b/doc/src/lexer_tutorial/001_lexer_gen.md index af4725bca..62bebc207 100644 --- a/doc/src/lexer_tutorial/001_lexer_gen.md +++ b/doc/src/lexer_tutorial/001_lexer_gen.md @@ -330,7 +330,7 @@ To this end `ignore patterns` can be specified. match { r"\s*" => { }, // The default whitespace skipping is disabled an `ignore pattern` is specified r"//[^\n\r]*[\n\r]*" => { }, // Skip `// comments` - r"/\*([^\*]*\*+[^\*/])*([^\*]*\*+|[^\*])*\*/" => { }, // Skip `/* comments */` + r"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/" => { }, // Skip `/* comments */` } ```