From 1588b1f5fd9d7807cf25f6d06a58770ee09f1af3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Thu, 28 Nov 2024 22:39:19 -0800 Subject: [PATCH] get rid of syntactically significant unicode equals signs (#400) Fixes: #399 --- CHANGELOG.md | 4 ---- README.md | 17 +++++++------ SPEC.md | 24 ++++--------------- .../expected_kdl/unicode_equals_signs.kdl | 1 - .../test_cases/expected_kdl/unicode_silly.kdl | 1 + .../test_cases/input/unicode_equals_signs.kdl | 4 ---- tests/test_cases/input/unicode_silly.kd | 1 + 7 files changed, 15 insertions(+), 37 deletions(-) delete mode 100644 tests/test_cases/expected_kdl/unicode_equals_signs.kdl create mode 100644 tests/test_cases/expected_kdl/unicode_silly.kdl delete mode 100644 tests/test_cases/input/unicode_equals_signs.kdl create mode 100644 tests/test_cases/input/unicode_silly.kd diff --git a/CHANGELOG.md b/CHANGELOG.md index fdf4140..abd18b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -59,10 +59,6 @@ whitespace matching the whitespace prefix of the closing line. Multiline strings and raw strings now must have a newline immediately following their opening `"`, and a final newline plus whitespace preceding the closing `"`. -* SMALL EQUALS SIGN (`U+FE66`), FULLWIDTH EQUALS SIGN (`U+FF1D`), and HEAVY - EQUALS SIGN (`U+1F7F0`) are now treated the same as `=` and can be used for - properties (e.g. `お名前=☜(゚ヮ゚☜)`). They are also no longer valid in bare - identifiers. * `.1`, `+.1` etc are no longer valid identifiers, to prevent confusion and conflicts with numbers. * Multi-line strings' literal Newline sequences are now normalized to single diff --git a/README.md b/README.md index ed3d9ab..a390762 100644 --- a/README.md +++ b/README.md @@ -158,11 +158,10 @@ node3 #"C:\Users\zkat\raw\string"# You don't have to quote strings unless any the following apply: * The string contains whitespace. - * The string contains any of `[]{}()\/#";`. - * The string is one of `true`, `false`, or `null`. + * The string contains any of `[]{}()\/#";=`. + * The string is one of `true`, `false`, `null`, `inf`, `-inf`, or `nan`. * The strings starts with a digit, or `+`/`-`/`.`/`-.`,`+.` and a digit. - * The string contains an equals sign (including unicode equals signs `﹦`, - `=`, and `🟰`). + (aka "looks like a number") In essence, if it can get confused for other KDL or KQL syntax, it needs quotes. @@ -296,8 +295,8 @@ smile 😁 // Identifiers are very flexible. The following is a legal bare identifier: <@foo123~!$%^&*.:'|?+> -// And you can also use unicode, even for the equals sign! -ノード お名前=☜(゚ヮ゚☜) +// And you can also use unicode! +ノード お名前=ฅ^•ﻌ•^ฅ // kdl specifically allows properties and values to be // interspersed with each other, much like CLI commands. @@ -335,9 +334,9 @@ SDLang, but that had some design choices I disagreed with. #### Ok, then, why not SDLang? -SDLang is designed for use cases that are not interesting to me, but are very -relevant to the D-lang community. KDL is very similar in many ways, but is -different in the following ways: +SDLang is an excellent base, but I wanted some details ironed out, and some +things removed that only really made sense for SDLang's current use-cases, including +some restrictions about data representation. KDL is very similar in many ways, except: * The grammar and expected semantics are [well-defined and specified](SPEC.md). * There is only one "number" type. KDL does not prescribe representations. diff --git a/SPEC.md b/SPEC.md index e09582a..c812c4a 100644 --- a/SPEC.md +++ b/SPEC.md @@ -112,8 +112,8 @@ my-node 1 2 \ // comments are ok after \ ### Property A Property is a key/value pair attached to a [Node](#node). A Property is -composed of a [String](#string), followed immediately by an [equals -sign](#equals-sign), and then a [Value](#value). +composed of a [String](#string), followed immediately by an equals sign (`=`, `U+003D`), +and then a [Value](#value). Properties should be interpreted left-to-right, with rightmost properties with identical names overriding earlier properties. That is: @@ -131,17 +131,6 @@ still be spec-compliant. Properties _MAY_ be prefixed with `/-` to "comment out" the entire token and make it act as plain whitespace, even if it spreads across multiple lines. -#### Equals Sign - -Any of the following characters may be used as equals signs in properties: - -| Name | Character | Code Point | -|----|-----|----| -| EQUALS SIGN | `=` | `U+003D` | -| SMALL EQUALS SIGN | `﹦` | `U+FE66` | -| FULLWIDTH EQUALS SIGN | `=` | `U+FF1D` | -| HEAVY EQUALS SIGN | `🟰` | `U+1F7F0` | - ### Argument An Argument is a bare [Value](#value) attached to a [Node](#node), with no @@ -334,8 +323,7 @@ negative number. The following characters cannot be used anywhere in a [Identifier String](#identifier-string): -* Any of `(){}[]/\"#;` -* Any [Equals Sign](#equals-sign) +* Any of `(){}[]/\"#;=` * Any [Whitespace](#whitespace) or [Newline](#newline). * Any [disallowed literal code points](#disallowed-literal-code-points) in KDL documents. @@ -780,19 +768,17 @@ node-prop-or-arg := prop | value node-children := '{' nodes final-node? '}' node-terminator := single-line-comment | newline | ';' | eof -prop := string optional-node-space equals-sign optional-node-space value +prop := string optional-node-space '=' optional-node-space value value := type? optional-node-space (string | number | keyword) type := '(' optional-node-space string optional-node-space ')' -equals-sign := See Table ([Equals Sign](#equals-sign)) - string := identifier-string | quoted-string | raw-string identifier-string := unambiguous-ident | signed-ident | dotted-ident unambiguous-ident := ((identifier-char - digit - sign - '.') identifier-char*) - 'true' - 'false' - 'null' - 'inf' - '-inf' - 'nan' signed-ident := sign ((identifier-char - digit - '.') identifier-char*)? dotted-ident := sign? '.' ((identifier-char - digit) identifier-char*)? -identifier-char := unicode - unicode-space - newline - [\\/(){};\[\]"#] - disallowed-literal-code-points - equals-sign +identifier-char := unicode - unicode-space - newline - [\\/(){};\[\]"#=] - disallowed-literal-code-points quoted-string := '"' (single-line-string-body | newline multi-line-string-body newline unicode-space*) '"' single-line-string-body := (string-character - newline)* diff --git a/tests/test_cases/expected_kdl/unicode_equals_signs.kdl b/tests/test_cases/expected_kdl/unicode_equals_signs.kdl deleted file mode 100644 index 4ab6443..0000000 --- a/tests/test_cases/expected_kdl/unicode_equals_signs.kdl +++ /dev/null @@ -1 +0,0 @@ -node p1=val1 p2=val2 p3=val3 diff --git a/tests/test_cases/expected_kdl/unicode_silly.kdl b/tests/test_cases/expected_kdl/unicode_silly.kdl new file mode 100644 index 0000000..5fa566d --- /dev/null +++ b/tests/test_cases/expected_kdl/unicode_silly.kdl @@ -0,0 +1 @@ +ノード お名前=ฅ^•ﻌ•^ฅ diff --git a/tests/test_cases/input/unicode_equals_signs.kdl b/tests/test_cases/input/unicode_equals_signs.kdl deleted file mode 100644 index 37d8e02..0000000 --- a/tests/test_cases/input/unicode_equals_signs.kdl +++ /dev/null @@ -1,4 +0,0 @@ -node \ - p1﹦val1 \ // U+FE66 - p2=val2 \ // U+FF1D - p3🟰val3 // U+1F7F0 diff --git a/tests/test_cases/input/unicode_silly.kd b/tests/test_cases/input/unicode_silly.kd new file mode 100644 index 0000000..5fa566d --- /dev/null +++ b/tests/test_cases/input/unicode_silly.kd @@ -0,0 +1 @@ +ノード お名前=ฅ^•ﻌ•^ฅ