Skip to content

Commit ff45085

Browse files
authored
Merge pull request #1487 from cgay/dep12
Implement DEP 12, string literal syntax
2 parents 8ed7dfc + ab1aa60 commit ff45085

File tree

9 files changed

+628
-276
lines changed

9 files changed

+628
-276
lines changed

documentation/library-reference/source/language-extensions/index.rst

+4-3
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,9 @@ convenient "dialect" of Dylan, exported from the module ``common-dylan``:
4444
use common-extensions, export: all;
4545
end module;
4646
47-
This section describes the common language extensions, that is,
48-
extensions made to the Dylan library as it is defined in DRM. These
49-
extensions are available to applications in the ``dylan`` library's
47+
This section describes Dylan language extensions, that is, extensions made to
48+
the Dylan library, and to the base language syntax, as they are defined in the
49+
DRM. These extensions are available to applications in the ``dylan`` library's
5050
``dylan`` module.
5151

5252
.. toctree::
@@ -63,5 +63,6 @@ extensions are available to applications in the ``dylan`` library's
6363
parser-expansions
6464
alternative-curry-syntax
6565
numeric-literals
66+
string-literals
6667

6768
All the other language extensions are described in :doc:`../common-dylan/index`.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
String Literal Syntax
2+
=====================
3+
4+
`Dylan Enhancement Proposal (DEP) 12
5+
<https://opendylan.org/proposals/dep-0012-string-literals.html>`_ extends the
6+
literal syntax for strings to include strings without any escape character
7+
processing ("raw" strings) and multi-line strings. Briefly,
8+
9+
#. Multi-line strings begin with three double-quote characters: ``"""``
10+
11+
#. End-of-line sequences in multi-line strings are always parsed as a single
12+
Newline (``\\n``) character, regardless of source file line endings or the
13+
conventions of the operating system.
14+
15+
#. Any string, whether delimited by ``"`` or ``"""`` may be prefixed with
16+
``#r`` or ``#R`` to disable escape sequence processing.
17+
18+
See `DEP 12 <https://opendylan.org/proposals/dep-0012-string-literals.html>`_
19+
for details.

documentation/release-notes/source/2023.1.rst

+4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ commit logs
2020
Compiler
2121
========
2222

23+
* `Dylan Enhancement Proposal (DEP) 12
24+
<https://opendylan.org/proposals/dep-0012-string-literals.html>`_ was
25+
implemented, adding multi-line and "raw" string literals.
26+
2327
Tooling
2428
=======
2529

sources/dfmc/reader/lexer-transitions.dylan

+116-25
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ define constant $ascii-8-bit-extensions
1515
= as(<string>, vector(as(<character>, 128), '-', as(<character>, 255)));
1616

1717
// Build the state graph and save the initial state.
18+
// Note that transition strings support ranges, like "A-Z".
1819
//
1920
define constant $initial-state :: <state>
2021
= compile-state-machine
@@ -62,15 +63,15 @@ define constant $initial-state :: <state>
6263
#('[' . #"sharp-bracket"),
6364
#('{' . #"sharp-brace"),
6465
#('#' . #"sharp-sharp"),
65-
#('"' . #"sharp-quote"),
66+
#('"' . #"sharp-double-quote"),
6667
#(':' . #"sharp-colon"),
6768
#("bB" . #"sharp-b"), // binary
6869
#("oO" . #"sharp-o"), // octal
6970
#("xX" . #"sharp-x"), // hex
7071
#("tT" . #"true"),
7172
#("fF" . #"false"),
7273
#("nN" . #"sharp-n"), // #next
73-
#("rR" . #"sharp-r"), // #rest
74+
#("rR" . #"sharp-r"), // #rest, #r"...", #r"""..."""
7475
#("kK" . #"sharp-k"), // #key
7576
#("aA" . #"sharp-a") // #all-keys
7677
/* CMU
@@ -103,7 +104,9 @@ define constant $initial-state :: <state>
103104
state(#"sharp-ne", #f, #("xX" . #"sharp-nex")),
104105
state(#"sharp-nex", #f, #("tT" . #"sharp-next")),
105106
state(#"sharp-next", fragment-builder(<hash-next-fragment>)),
106-
state(#"sharp-r", #f, #("eE" . #"sharp-re")),
107+
state(#"sharp-r", #f,
108+
#("eE" . #"sharp-re"),
109+
#('"' . #"raw-string-start")),
107110
state(#"sharp-re", #f, #("sS" . #"sharp-res")),
108111
state(#"sharp-res", #f, #("tT" . #"sharp-rest")),
109112
state(#"sharp-rest", fragment-builder(<hash-rest-fragment>)),
@@ -118,20 +121,47 @@ define constant $initial-state :: <state>
118121
state(#"sharp-all-ke", #f, #("yY" . #"sharp-all-key")),
119122
state(#"sharp-all-key", #f, #("sS" . #"sharp-all-keys")),
120123
state(#"sharp-all-keys", fragment-builder(<hash-all-keys-fragment>)),
121-
state(#"sharp-quote", #f,
122-
#('"' . #"quoted-keyword"),
123-
#('\\' . #"sharp-quote-escape"),
124-
#(" !#-[]-~" . #"sharp-quote"),
125-
pair($ascii-8-bit-extensions, #"sharp-quote")),
126-
state(#"sharp-quote-escape", #f,
127-
#("\\abefnrt0\"" . #"sharp-quote"),
128-
#('<' . #"sharp-quote-escape-less")),
129-
state(#"sharp-quote-escape-less", #f,
130-
#("0-9a-fA-F" . #"sharp-quote-hex-char-digits")),
131-
state(#"sharp-quote-hex-char-digits", #f,
132-
#("0-9a-fA-F" . #"sharp-quote-hex-char-digits"),
133-
#('>' . #"sharp-quote")),
134-
state(#"quoted-keyword", make-quoted-symbol),
124+
125+
state(#"sharp-double-quote", #f,
126+
#('"' . #"sharp-2-double-quotes"),
127+
#('\\' . #"quoted-symbol-escape"),
128+
#(" !#-[]-~" . #"quoted-symbol"),
129+
pair($ascii-8-bit-extensions, #"quoted-symbol")),
130+
state(#"sharp-2-double-quotes", rcurry(make-quoted-symbol, 2, 1),
131+
#('"' . #"3quoted-symbol")),
132+
state(#"quoted-symbol", #f,
133+
#('"' . #"quoted-symbol-end"),
134+
#('\\' . #"quoted-symbol-escape"),
135+
#(" !#-[]-~" . #"quoted-symbol"),
136+
pair($ascii-8-bit-extensions, #"quoted-symbol")),
137+
state(#"quoted-symbol-escape", #f,
138+
#("\\abefnrt0\"" . #"quoted-symbol"),
139+
#('<' . #"quoted-symbol-escape-less")),
140+
state(#"quoted-symbol-escape-less", #f,
141+
#("0-9a-fA-F" . #"quoted-symbol-hex-digits")),
142+
state(#"quoted-symbol-hex-digits", #f,
143+
#("0-9a-fA-F" . #"quoted-symbol-hex-digits"),
144+
#('>' . #"quoted-symbol")),
145+
state(#"quoted-symbol-end", rcurry(make-quoted-symbol, 2, 1)),
146+
state(#"3quoted-symbol", #f,
147+
#('"' . #"3quoted-symbol-double-quote"),
148+
#("\r\n !#-[]-~" . #"3quoted-symbol"),
149+
#('\\' . #"3quoted-symbol-escape")),
150+
state(#"3quoted-symbol-escape", #f,
151+
#("\\abefnrt0\"" . #"3quoted-symbol"),
152+
#('<' . #"3quoted-symbol-escape-less")),
153+
state(#"3quoted-symbol-escape-less", #f,
154+
#("0-9a-fA-F" . #"3quoted-symbol-hex-digits")),
155+
state(#"3quoted-symbol-hex-digits", #f,
156+
#("0-9a-fA-F" . #"3quoted-symbol-hex-digits"),
157+
#('>' . #"3quoted-symbol")),
158+
state(#"3quoted-symbol-double-quote", #f,
159+
#('"' . #"3quoted-symbol-2-double-quotes"),
160+
#("\r\n !#-[]-~" . #"3quoted-symbol")),
161+
state(#"3quoted-symbol-2-double-quotes", #f,
162+
#('"' . #"3quoted-symbol-end"),
163+
#("\r\n !#-[]-~" . #"3quoted-symbol")),
164+
state(#"3quoted-symbol-end", rcurry(make-quoted-symbol, 4, 3)),
135165

136166
state(#"sharp-b", #f,
137167
#("01" . #"binary-integer")),
@@ -477,15 +507,76 @@ define constant $initial-state :: <state>
477507
#('\'' . #"character")),
478508
state(#"character", make-character-literal),
479509
state(#"double-quote", #f,
480-
#('"' . #"string"),
481-
#('\\' . #"double-quote-escape"),
482-
#(" !#-[]-~" . #"double-quote"),
483-
pair($ascii-8-bit-extensions, #"double-quote")),
484-
state(#"string", make-string-literal),
485-
state(#"double-quote-escape", #f,
510+
#('"' . #"two-double-quotes"),
511+
#('\\' . #"string-escape"),
512+
#(" !#-[]-~" . #"simple-string"),
513+
pair($ascii-8-bit-extensions, #"simple-string")),
514+
state(#"simple-string", #f,
515+
#('"' . #"end-simple-string"),
516+
#('\\' . #"string-escape"),
517+
#(" !#-[]-~" . #"simple-string"),
518+
pair($ascii-8-bit-extensions, #"simple-string")),
519+
state(#"end-simple-string", make-string-literal),
520+
state(#"two-double-quotes", make-string-literal,
521+
#('"' . #"3string")),
522+
523+
state(#"3string", #f, // seen """
524+
#('"' . #"close-double-quote"),
525+
#('\\' . #"3string-escape"),
526+
#(" !#-[]-~\r\n" . #"3string"),
527+
pair($ascii-8-bit-extensions, #"3string")),
528+
state(#"3string-escape", #f,
529+
#("\\'\"abefnrt0" . #"3string"),
530+
#('<' . #"3string-escape-less")),
531+
state(#"3string-escape-less", #f,
532+
#("0-9a-fA-F" . #"3string-hex-char-digits")),
533+
state(#"3string-hex-char-digits", #f,
534+
#("0-9a-fA-F" . #"3string-hex-char-digits"),
535+
#('>' . #"3string")),
536+
state(#"close-double-quote", #f,
537+
#('"' . #"close-double-quote-2"),
538+
#(" !#-[]-~\r\n" . #"3string"),
539+
pair($ascii-8-bit-extensions, #"3string")),
540+
state(#"close-double-quote-2", #f,
541+
#('"' . #"multi-line-string"),
542+
#(" !#-[]-~\r\n" . #"3string"),
543+
pair($ascii-8-bit-extensions, #"3string")),
544+
state(#"multi-line-string", make-multi-line-string-literal),
545+
546+
// Raw strings
547+
state(#"raw-string-start", #f, // seen #r"
548+
#('"' . #"sharp-r-2-double-quotes"),
549+
#(" !#-~" . #"raw-1string"),
550+
pair($ascii-8-bit-extensions, #"raw-1string")),
551+
state(#"sharp-r-2-double-quotes", make-raw-string-literal,
552+
#('"' . #"raw-3string-start")),
553+
state(#"raw-1string", #f, // seen #r" plus one non-" char
554+
#('"' . #"raw-1string-end"),
555+
#(" !#-~" . #"raw-1string"),
556+
pair($ascii-8-bit-extensions, #"raw-1string")),
557+
state(#"raw-1string-end", make-raw-string-literal),
558+
state(#"raw-3string-start", #f, // seen #r"""
559+
#('"' . #"raw-3string-double-quote"),
560+
#(" !#-~\r\n" . #"raw-3string"),
561+
pair($ascii-8-bit-extensions, #"raw-3string")),
562+
state(#"raw-3string", #f,
563+
#('"' . #"raw-3string-double-quote"),
564+
#(" !#-~\r\n" . #"raw-3string"),
565+
pair($ascii-8-bit-extensions, #"raw-3string")),
566+
state(#"raw-3string-double-quote", #f,
567+
#('"' . #"raw-3string-2-double-quotes"),
568+
#(" !#-~\r\n" . #"raw-3string"),
569+
pair($ascii-8-bit-extensions, #"raw-3string")),
570+
state(#"raw-3string-2-double-quotes", #f,
571+
#('"' . #"raw-3string-end"),
572+
#(" !#-~\r\n" . #"raw-3string"),
573+
pair($ascii-8-bit-extensions, #"raw-3string")),
574+
state(#"raw-3string-end", make-multi-line-raw-string-literal),
575+
576+
state(#"string-escape", #f,
486577
#("\\'\"abefnrt0" . #"double-quote"),
487-
#('<' . #"double-quote-escape-less")),
488-
state(#"double-quote-escape-less", #f,
578+
#('<' . #"string-escape-less")),
579+
state(#"string-escape-less", #f,
489580
#("0-9a-fA-F" . #"double-quote-hex-char-digits")),
490581
state(#"double-quote-hex-char-digits", #f,
491582
#("0-9a-fA-F" . #"double-quote-hex-char-digits"),

0 commit comments

Comments
 (0)