diff --git a/spec/fluent.ebnf b/spec/fluent.ebnf index f0a1fa8..2982e7d 100644 --- a/spec/fluent.ebnf +++ b/spec/fluent.ebnf @@ -57,7 +57,7 @@ InlineExpression ::= StringLiteral | inline_placeable /* Literals */ -StringLiteral ::= quote quoted_text_char* quote +StringLiteral ::= "\"" quoted_text_char* "\"" NumberLiteral ::= "-"? digit+ ("." digit+)? /* Inline Expressions */ @@ -87,22 +87,24 @@ Function ::= [A-Z] [A-Z_?-]* identifier ::= [a-zA-Z] [a-zA-Z0-9_-]* /* Characters */ -backslash ::= "\\" -quote ::= "\"" -/* Any Unicode character from BMP excluding C0 control characters, space, - * surrogate blocks and non-characters (U+FFFE, U+FFFF). - * Cf. https://www.w3.org/TR/REC-xml/#NT-Char - */ -regular_char ::= [\\u{21}-\\u{D7FF}\\u{E000}-\\u{FFFD}\\u{10000}-\\u{10FFFF}] +/* Any Unicode character excluding C0 control characters (but including tab), + * space, surrogate blocks and non-characters (U+FFFE, U+FFFF). + * Cf. https://www.w3.org/TR/REC-xml/#NT-Char */ +regular_char ::= [\\u{9}\\u{21}-\\u{D7FF}\\u{E000}-\\u{FFFD}] + | [\\u{10000}-\\u{10FFFF}] +/* The opening brace in text starts a placeable. */ text_char ::= blank_inline - | "\u0009" - | /\\u[0-9a-fA-F]{4}/ - | (backslash backslash) - | (backslash "{") - | (regular_char - "{" - backslash) + | (regular_char - "{") +/* Indented text may not start with characters which mark its end. */ indented_char ::= text_char - "}" - "[" - "*" - "." -quoted_text_char ::= (text_char - quote) - | (backslash quote) +/* Backslash can be used to escape the double quote and the backslash itself. + * The literal opening brace { is allowed because StringLiterals may not have + * placeables. \uXXXX Unicode escape sequences are recognized, too. */ +quoted_text_char ::= /\\u[0-9a-fA-F]{4}/ + | "{" + | "\\\\" + | "\\\"" + | (text_char - "\"" - "\\") digit ::= [0-9] /* Whitespace */ diff --git a/syntax/grammar.mjs b/syntax/grammar.mjs index 17a35f4..de927f4 100644 --- a/syntax/grammar.mjs +++ b/syntax/grammar.mjs @@ -198,9 +198,9 @@ let InlineExpression = defer(() => /* Literals */ let StringLiteral = defer(() => sequence( - quote, + string("\""), repeat(quoted_text_char), - quote) + string("\"")) .map(element_at(1)) .map(join) .chain(into(FTL.StringLiteral))); @@ -380,32 +380,23 @@ let identifier = /* ---------- */ /* Characters */ -let backslash = string("\\"); -let quote = string("\""); - -/* Any Unicode character from BMP excluding C0 control characters, space, - * surrogate blocks and non-characters (U+FFFE, U+FFFF). - * Cf. https://www.w3.org/TR/REC-xml/#NT-Char - */ +/* Any Unicode character excluding C0 control characters (but including tab), + * space, surrogate blocks and non-characters (U+FFFE, U+FFFF). + * Cf. https://www.w3.org/TR/REC-xml/#NT-Char */ let regular_char = - charset("\\u{21}-\\u{D7FF}\\u{E000}-\\u{FFFD}\\u{10000}-\\u{10FFFF}"); + either( + charset("\\u{9}\\u{21}-\\u{D7FF}\\u{E000}-\\u{FFFD}"), + charset("\\u{10000}-\\u{10FFFF}")); +/* The opening brace in text starts a placeable. */ let text_char = defer(() => either( blank_inline, - string("\u0009"), - regex(/\\u[0-9a-fA-F]{4}/), - sequence( - backslash, - backslash).map(join), - sequence( - backslash, - string("{")).map(join), and( - not(backslash), not(string("{")), regular_char))); +/* Indented text may not start with characters which mark its end. */ let indented_char = defer(() => and( not(string(".")), @@ -414,14 +405,19 @@ let indented_char = defer(() => not(string("}")), text_char)); +/* Backslash can be used to escape the double quote and the backslash itself. + * The literal opening brace { is allowed because StringLiterals may not have + * placeables. \uXXXX Unicode escape sequences are recognized, too. */ let quoted_text_char = either( + regex(/\\u[0-9a-fA-F]{4}/), + string("{"), + string("\\\\"), + string("\\\""), and( - not(quote), - text_char), - sequence( - backslash, - quote).map(join)); + not(string("\\")), + not(string("\"")), + text_char)); let digit = charset("0-9"); diff --git a/test/fixtures/escaped_characters.ftl b/test/fixtures/escaped_characters.ftl index d3a5a07..3c64fce 100644 --- a/test/fixtures/escaped_characters.ftl +++ b/test/fixtures/escaped_characters.ftl @@ -1,9 +1,22 @@ -backslash = Value with \\ (an escaped backslash) -closing-brace = Value with \{ (a closing brace) -unicode-escape = \u0041 -escaped-unicode = \\u0041 +## Literal text +text-backslash-one = Value with \ a backslash +text-backslash-two = Value with \\ two backslashes +text-backslash-brace = Value with \{placeable} +text-backslash-u = \u0041 +text-backslash-backslash-u = \\u0041 -## String Expressions +## String literals quote-in-string = {"\""} backslash-in-string = {"\\"} +# ERROR Mismatched quote mismatched-quote = {"\\""} +# ERROR Unknown escape +unknown-escape = {"\x"} + +## Unicode escapes +string-unicode-sequence = {"\u0041"} +string-escaped-unicode = {"\\u0041"} + +## Literal braces +brace-open = An opening {"{"} brace. +brace-close = A closing } brace. diff --git a/test/fixtures/escaped_characters.json b/test/fixtures/escaped_characters.json index 5602775..6c26f82 100644 --- a/test/fixtures/escaped_characters.json +++ b/test/fixtures/escaped_characters.json @@ -1,18 +1,22 @@ { "type": "Resource", "body": [ + { + "type": "GroupComment", + "content": "Literal text" + }, { "type": "Message", "id": { "type": "Identifier", - "name": "backslash" + "name": "text-backslash-one" }, "value": { "type": "Pattern", "elements": [ { "type": "TextElement", - "value": "Value with \\\\ (an escaped backslash)" + "value": "Value with \\ a backslash" } ] }, @@ -23,14 +27,14 @@ "type": "Message", "id": { "type": "Identifier", - "name": "closing-brace" + "name": "text-backslash-two" }, "value": { "type": "Pattern", "elements": [ { "type": "TextElement", - "value": "Value with \\{ (a closing brace)" + "value": "Value with \\\\ two backslashes" } ] }, @@ -41,7 +45,35 @@ "type": "Message", "id": { "type": "Identifier", - "name": "unicode-escape" + "name": "text-backslash-brace" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "TextElement", + "value": "Value with \\" + }, + { + "type": "Placeable", + "expression": { + "type": "MessageReference", + "id": { + "type": "Identifier", + "name": "placeable" + } + } + } + ] + }, + "attributes": [], + "comment": null + }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "text-backslash-u" }, "value": { "type": "Pattern", @@ -59,7 +91,7 @@ "type": "Message", "id": { "type": "Identifier", - "name": "escaped-unicode" + "name": "text-backslash-backslash-u" }, "value": { "type": "Pattern", @@ -75,7 +107,7 @@ }, { "type": "GroupComment", - "content": "String Expressions" + "content": "String literals" }, { "type": "Message", @@ -119,10 +151,120 @@ "attributes": [], "comment": null }, + { + "type": "Comment", + "content": "ERROR Mismatched quote" + }, { "type": "Junk", "annotations": [], "content": "mismatched-quote = {\"\\\\\"\"}\n" + }, + { + "type": "Comment", + "content": "ERROR Unknown escape" + }, + { + "type": "Junk", + "annotations": [], + "content": "unknown-escape = {\"\\x\"}\n" + }, + { + "type": "GroupComment", + "content": "Unicode escapes" + }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "string-unicode-sequence" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "Placeable", + "expression": { + "type": "StringLiteral", + "value": "\\u0041" + } + } + ] + }, + "attributes": [], + "comment": null + }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "string-escaped-unicode" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "Placeable", + "expression": { + "type": "StringLiteral", + "value": "\\\\u0041" + } + } + ] + }, + "attributes": [], + "comment": null + }, + { + "type": "GroupComment", + "content": "Literal braces" + }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "brace-open" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "TextElement", + "value": "An opening " + }, + { + "type": "Placeable", + "expression": { + "type": "StringLiteral", + "value": "{" + } + }, + { + "type": "TextElement", + "value": " brace." + } + ] + }, + "attributes": [], + "comment": null + }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "brace-close" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "TextElement", + "value": "A closing } brace." + } + ] + }, + "attributes": [], + "comment": null } ] }