Skip to content

Commit

Permalink
Define the escapes lexically
Browse files Browse the repository at this point in the history
  • Loading branch information
stasm committed Oct 23, 2018
1 parent 11b5a8e commit a782d22
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 31 deletions.
27 changes: 14 additions & 13 deletions spec/fluent.ebnf
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ InlineExpression ::= StringLiteral
| inline_placeable

/* Literals */
StringLiteral ::= "\"" quoted_text_char* "\""
StringLiteral ::= "\"" quoted_char* "\""
NumberLiteral ::= "-"? digit+ ("." digit+)?

/* Inline Expressions */
Expand Down Expand Up @@ -88,23 +88,24 @@ identifier ::= [a-zA-Z] [a-zA-Z0-9_-]*

/* Characters */
/* Any Unicode character excluding C0 control characters (but including tab),
* space, surrogate blocks and non-characters (U+FFFE, U+FFFF).
* surrogate blocks and non-characters (U+FFFE, U+FFFF).
* Cf. https://www.w3.org/TR/REC-xml/#NT-Char */
regular_char ::= [\\u{9}\\u{21}-\\u{D7FF}\\u{E000}-\\u{FFFD}]
regular_char ::= [\\u{9}\\u{20}-\\u{D7FF}\\u{E000}-\\u{FFFD}]
| [\\u{10000}-\\u{10FFFF}]
/* The opening brace in text starts a placeable. */
text_char ::= (regular_char - "{")
| "\u0020"
special_text_char ::= "{"
/* Double quote and backslash need to be escaped in string literals. */
special_quoted_char ::= "\""
| "\\"
text_char ::= regular_char - special_text_char
/* Indented text may not start with characters which mark its end. */
indented_char ::= text_char - "}" - "[" - "*" - "."
/* Backslash can be used to escape the double quote and the backslash itself.
* The literal opening brace { is allowed because StringLiterals may not have
* placeables. \uXXXX Unicode escape sequences are recognized, too. */
quoted_text_char ::= (text_char - "\"" - "\\")
| /\\u[0-9a-fA-F]{4}/
| "{"
| "\\\\"
| "\\\""
literal_escape ::= "\\" special_quoted_char
unicode_escape ::= "\\u" /[0-9a-fA-F]{4}/
quoted_char ::= (text_char - special_quoted_char)
| special_text_char
| literal_escape
| unicode_escape
digit ::= [0-9]

/* Whitespace */
Expand Down
52 changes: 34 additions & 18 deletions syntax/grammar.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ let InlineExpression = defer(() =>
let StringLiteral = defer(() =>
sequence(
string("\""),
repeat(quoted_text_char),
repeat(quoted_char),
string("\""))
.map(element_at(1))
.map(join)
Expand Down Expand Up @@ -381,20 +381,27 @@ let identifier =
/* Characters */

/* Any Unicode character excluding C0 control characters (but including tab),
* space, surrogate blocks and non-characters (U+FFFE, U+FFFF).
* surrogate blocks and non-characters (U+FFFE, U+FFFF).
* Cf. https://www.w3.org/TR/REC-xml/#NT-Char */
let regular_char =
either(
charset("\\u{9}\\u{21}-\\u{D7FF}\\u{E000}-\\u{FFFD}"),
charset("\\u{9}\\u{20}-\\u{D7FF}\\u{E000}-\\u{FFFD}"),
charset("\\u{10000}-\\u{10FFFF}"));

/* The opening brace in text starts a placeable. */
let text_char =
let special_text_char =
string("{");

/* Double quote and backslash need to be escaped in string literals. */
let special_quoted_char =
either(
and(
not(string("{")),
regular_char),
string("\u0020"));
string("\""),
string("\\"));

let text_char =
and(
not(special_text_char),
regular_char);

/* Indented text may not start with characters which mark its end. */
let indented_char =
Expand All @@ -405,19 +412,28 @@ let indented_char =
not(string("}")),
text_char);

/* Backslash can be used to escape the double quote and the backslash itself.
* The literal opening brace { is allowed because StringLiterals may not have
* placeables. \uXXXX Unicode escape sequences are recognized, too. */
let quoted_text_char =
let literal_escape =
sequence(
string("\\"),
special_quoted_char)
.map(join);

let unicode_escape =
sequence(
string("\\u"),
regex(/[0-9a-fA-F]{4}/))
.map(join);

/* The literal opening brace { is allowed in string literals because they may
* not have placeables. */
let quoted_char =
either(
and(
not(string("\\")),
not(string("\"")),
not(special_quoted_char),
text_char),
regex(/\\u[0-9a-fA-F]{4}/),
string("{"),
string("\\\\"),
string("\\\""));
special_text_char,
literal_escape,
unicode_escape);

let digit = charset("0-9");

Expand Down

0 comments on commit a782d22

Please sign in to comment.