Skip to content

Commit

Permalink
Treat backslash as normal char in TextElements
Browse files Browse the repository at this point in the history
  • Loading branch information
stasm committed Oct 16, 2018
1 parent 7a24f9b commit 59c2cda
Show file tree
Hide file tree
Showing 4 changed files with 204 additions and 51 deletions.
32 changes: 17 additions & 15 deletions spec/fluent.ebnf
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ InlineExpression ::= StringLiteral
| inline_placeable

/* Literals */
StringLiteral ::= quote quoted_text_char* quote
StringLiteral ::= "\"" quoted_text_char* "\""
NumberLiteral ::= "-"? digit+ ("." digit+)?

/* Inline Expressions */
Expand Down Expand Up @@ -87,22 +87,24 @@ Function ::= [A-Z] [A-Z_?-]*
identifier ::= [a-zA-Z] [a-zA-Z0-9_-]*

/* Characters */
backslash ::= "\\"
quote ::= "\""
/* Any Unicode character from BMP excluding C0 control characters, space,
* surrogate blocks and non-characters (U+FFFE, U+FFFF).
* Cf. https://www.w3.org/TR/REC-xml/#NT-Char
*/
regular_char ::= [\\u{21}-\\u{D7FF}\\u{E000}-\\u{FFFD}\\u{10000}-\\u{10FFFF}]
/* Any Unicode character excluding C0 control characters (but including tab),
* space, surrogate blocks and non-characters (U+FFFE, U+FFFF).
* Cf. https://www.w3.org/TR/REC-xml/#NT-Char */
regular_char ::= [\\u{9}\\u{21}-\\u{D7FF}\\u{E000}-\\u{FFFD}]
| [\\u{10000}-\\u{10FFFF}]
/* The opening brace in text starts a placeable. */
text_char ::= blank_inline
| "\u0009"
| /\\u[0-9a-fA-F]{4}/
| (backslash backslash)
| (backslash "{")
| (regular_char - "{" - backslash)
| (regular_char - "{")
/* Indented text may not start with characters which mark its end. */
indented_char ::= text_char - "}" - "[" - "*" - "."
quoted_text_char ::= (text_char - quote)
| (backslash quote)
/* Backslash can be used to escape the double quote and the backslash itself.
* The literal opening brace { is allowed because StringLiterals may not have
* placeables. \uXXXX Unicode escape sequences are recognized, too. */
quoted_text_char ::= /\\u[0-9a-fA-F]{4}/
| "{"
| "\\\\"
| "\\\""
| (text_char - "\"" - "\\")
digit ::= [0-9]

/* Whitespace */
Expand Down
44 changes: 20 additions & 24 deletions syntax/grammar.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,9 @@ let InlineExpression = defer(() =>
/* Literals */
let StringLiteral = defer(() =>
sequence(
quote,
string("\""),
repeat(quoted_text_char),
quote)
string("\""))
.map(element_at(1))
.map(join)
.chain(into(FTL.StringLiteral)));
Expand Down Expand Up @@ -380,32 +380,23 @@ let identifier =
/* ---------- */
/* Characters */

let backslash = string("\\");
let quote = string("\"");

/* Any Unicode character from BMP excluding C0 control characters, space,
* surrogate blocks and non-characters (U+FFFE, U+FFFF).
* Cf. https://www.w3.org/TR/REC-xml/#NT-Char
*/
/* Any Unicode character excluding C0 control characters (but including tab),
* space, surrogate blocks and non-characters (U+FFFE, U+FFFF).
* Cf. https://www.w3.org/TR/REC-xml/#NT-Char */
let regular_char =
charset("\\u{21}-\\u{D7FF}\\u{E000}-\\u{FFFD}\\u{10000}-\\u{10FFFF}");
either(
charset("\\u{9}\\u{21}-\\u{D7FF}\\u{E000}-\\u{FFFD}"),
charset("\\u{10000}-\\u{10FFFF}"));

/* The opening brace in text starts a placeable. */
let text_char = defer(() =>
either(
blank_inline,
string("\u0009"),
regex(/\\u[0-9a-fA-F]{4}/),
sequence(
backslash,
backslash).map(join),
sequence(
backslash,
string("{")).map(join),
and(
not(backslash),
not(string("{")),
regular_char)));

/* Indented text may not start with characters which mark its end. */
let indented_char = defer(() =>
and(
not(string(".")),
Expand All @@ -414,14 +405,19 @@ let indented_char = defer(() =>
not(string("}")),
text_char));

/* Backslash can be used to escape the double quote and the backslash itself.
* The literal opening brace { is allowed because StringLiterals may not have
* placeables. \uXXXX Unicode escape sequences are recognized, too. */
let quoted_text_char =
either(
regex(/\\u[0-9a-fA-F]{4}/),
string("{"),
string("\\\\"),
string("\\\""),
and(
not(quote),
text_char),
sequence(
backslash,
quote).map(join));
not(string("\\")),
not(string("\"")),
text_char));

let digit = charset("0-9");

Expand Down
23 changes: 18 additions & 5 deletions test/fixtures/escaped_characters.ftl
Original file line number Diff line number Diff line change
@@ -1,9 +1,22 @@
backslash = Value with \\ (an escaped backslash)
closing-brace = Value with \{ (a closing brace)
unicode-escape = \u0041
escaped-unicode = \\u0041
## Literal text
text-backslash-one = Value with \ a backslash
text-backslash-two = Value with \\ two backslashes
text-backslash-brace = Value with \{placeable}
text-backslash-u = \u0041
text-backslash-backslash-u = \\u0041
## String Expressions
## String literals
quote-in-string = {"\""}
backslash-in-string = {"\\"}
# ERROR Mismatched quote
mismatched-quote = {"\\""}
# ERROR Unknown escape
unknown-escape = {"\x"}
## Unicode escapes
string-unicode-sequence = {"\u0041"}
string-escaped-unicode = {"\\u0041"}
## Literal braces
brace-open = An opening {"{"} brace.
brace-close = A closing } brace.
156 changes: 149 additions & 7 deletions test/fixtures/escaped_characters.json
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
{
"type": "Resource",
"body": [
{
"type": "GroupComment",
"content": "Literal text"
},
{
"type": "Message",
"id": {
"type": "Identifier",
"name": "backslash"
"name": "text-backslash-one"
},
"value": {
"type": "Pattern",
"elements": [
{
"type": "TextElement",
"value": "Value with \\\\ (an escaped backslash)"
"value": "Value with \\ a backslash"
}
]
},
Expand All @@ -23,14 +27,14 @@
"type": "Message",
"id": {
"type": "Identifier",
"name": "closing-brace"
"name": "text-backslash-two"
},
"value": {
"type": "Pattern",
"elements": [
{
"type": "TextElement",
"value": "Value with \\{ (a closing brace)"
"value": "Value with \\\\ two backslashes"
}
]
},
Expand All @@ -41,7 +45,35 @@
"type": "Message",
"id": {
"type": "Identifier",
"name": "unicode-escape"
"name": "text-backslash-brace"
},
"value": {
"type": "Pattern",
"elements": [
{
"type": "TextElement",
"value": "Value with \\"
},
{
"type": "Placeable",
"expression": {
"type": "MessageReference",
"id": {
"type": "Identifier",
"name": "placeable"
}
}
}
]
},
"attributes": [],
"comment": null
},
{
"type": "Message",
"id": {
"type": "Identifier",
"name": "text-backslash-u"
},
"value": {
"type": "Pattern",
Expand All @@ -59,7 +91,7 @@
"type": "Message",
"id": {
"type": "Identifier",
"name": "escaped-unicode"
"name": "text-backslash-backslash-u"
},
"value": {
"type": "Pattern",
Expand All @@ -75,7 +107,7 @@
},
{
"type": "GroupComment",
"content": "String Expressions"
"content": "String literals"
},
{
"type": "Message",
Expand Down Expand Up @@ -119,10 +151,120 @@
"attributes": [],
"comment": null
},
{
"type": "Comment",
"content": "ERROR Mismatched quote"
},
{
"type": "Junk",
"annotations": [],
"content": "mismatched-quote = {\"\\\\\"\"}\n"
},
{
"type": "Comment",
"content": "ERROR Unknown escape"
},
{
"type": "Junk",
"annotations": [],
"content": "unknown-escape = {\"\\x\"}\n"
},
{
"type": "GroupComment",
"content": "Unicode escapes"
},
{
"type": "Message",
"id": {
"type": "Identifier",
"name": "string-unicode-sequence"
},
"value": {
"type": "Pattern",
"elements": [
{
"type": "Placeable",
"expression": {
"type": "StringLiteral",
"value": "\\u0041"
}
}
]
},
"attributes": [],
"comment": null
},
{
"type": "Message",
"id": {
"type": "Identifier",
"name": "string-escaped-unicode"
},
"value": {
"type": "Pattern",
"elements": [
{
"type": "Placeable",
"expression": {
"type": "StringLiteral",
"value": "\\\\u0041"
}
}
]
},
"attributes": [],
"comment": null
},
{
"type": "GroupComment",
"content": "Literal braces"
},
{
"type": "Message",
"id": {
"type": "Identifier",
"name": "brace-open"
},
"value": {
"type": "Pattern",
"elements": [
{
"type": "TextElement",
"value": "An opening "
},
{
"type": "Placeable",
"expression": {
"type": "StringLiteral",
"value": "{"
}
},
{
"type": "TextElement",
"value": " brace."
}
]
},
"attributes": [],
"comment": null
},
{
"type": "Message",
"id": {
"type": "Identifier",
"name": "brace-close"
},
"value": {
"type": "Pattern",
"elements": [
{
"type": "TextElement",
"value": "A closing } brace."
}
]
},
"attributes": [],
"comment": null
}
]
}

0 comments on commit 59c2cda

Please sign in to comment.