@@ -15,6 +15,7 @@ define constant $ascii-8-bit-extensions
15
15
= as (<string> , vector (as (<character> , 128 ), '-' , as (<character> , 255 )));
16
16
17
17
// Build the state graph and save the initial state.
18
+ // Note that transition strings support ranges, like "A-Z".
18
19
//
19
20
define constant $initial-state :: <state>
20
21
= compile-state-machine
@@ -62,15 +63,15 @@ define constant $initial-state :: <state>
62
63
#('[' . # "sharp-bracket" ),
63
64
#('{' . # "sharp-brace" ),
64
65
#('#' . # "sharp-sharp" ),
65
- #('"' . # "sharp-quote" ),
66
+ #('"' . # "sharp-double- quote" ),
66
67
#(':' . # "sharp-colon" ),
67
68
#("bB" . # "sharp-b" ), // binary
68
69
#("oO" . # "sharp-o" ), // octal
69
70
#("xX" . # "sharp-x" ), // hex
70
71
#("tT" . # "true" ),
71
72
#("fF" . # "false" ),
72
73
#("nN" . # "sharp-n" ), // #next
73
- #("rR" . # "sharp-r" ), // #rest
74
+ #("rR" . # "sharp-r" ), // #rest, #r"...", #r"""..."""
74
75
#("kK" . # "sharp-k" ), // #key
75
76
#("aA" . # "sharp-a" ) // #all-keys
76
77
/* CMU
@@ -103,7 +104,9 @@ define constant $initial-state :: <state>
103
104
state(# "sharp-ne" , #f , #("xX" . # "sharp-nex" )),
104
105
state(# "sharp-nex" , #f , #("tT" . # "sharp-next" )),
105
106
state(# "sharp-next" , fragment-builder(<hash-next-fragment>)),
106
- state(# "sharp-r" , #f , #("eE" . # "sharp-re" )),
107
+ state(# "sharp-r" , #f ,
108
+ #("eE" . # "sharp-re" ),
109
+ #('"' . # "raw-string-start" )),
107
110
state(# "sharp-re" , #f , #("sS" . # "sharp-res" )),
108
111
state(# "sharp-res" , #f , #("tT" . # "sharp-rest" )),
109
112
state(# "sharp-rest" , fragment-builder(<hash-rest-fragment>)),
@@ -118,20 +121,47 @@ define constant $initial-state :: <state>
118
121
state(# "sharp-all-ke" , #f , #("yY" . # "sharp-all-key" )),
119
122
state(# "sharp-all-key" , #f , #("sS" . # "sharp-all-keys" )),
120
123
state(# "sharp-all-keys" , fragment-builder(<hash-all-keys-fragment>)),
121
- state(# "sharp-quote" , #f ,
122
- #('"' . # "quoted-keyword" ),
123
- #('\\' . # "sharp-quote-escape" ),
124
- #(" !#-[]-~" . # "sharp-quote" ),
125
- pair ($ascii-8 -bit-extensions, # "sharp-quote" )),
126
- state(# "sharp-quote-escape" , #f ,
127
- #("\\ abefnrt0\" " . # "sharp-quote" ),
128
- #('<' . # "sharp-quote-escape-less" )),
129
- state(# "sharp-quote-escape-less" , #f ,
130
- #("0-9a-fA-F" . # "sharp-quote-hex-char-digits" )),
131
- state(# "sharp-quote-hex-char-digits" , #f ,
132
- #("0-9a-fA-F" . # "sharp-quote-hex-char-digits" ),
133
- #('>' . # "sharp-quote" )),
134
- state(# "quoted-keyword" , make-quoted-symbol),
124
+
125
+ state(# "sharp-double-quote" , #f ,
126
+ #('"' . # "sharp-2-double-quotes" ),
127
+ #('\\' . # "quoted-symbol-escape" ),
128
+ #(" !#-[]-~" . # "quoted-symbol" ),
129
+ pair ($ascii-8 -bit-extensions, # "quoted-symbol" )),
130
+ state(# "sharp-2-double-quotes" , rcurry (make-quoted-symbol, 2 , 1 ),
131
+ #('"' . # "3quoted-symbol" )),
132
+ state(# "quoted-symbol" , #f ,
133
+ #('"' . # "quoted-symbol-end" ),
134
+ #('\\' . # "quoted-symbol-escape" ),
135
+ #(" !#-[]-~" . # "quoted-symbol" ),
136
+ pair ($ascii-8 -bit-extensions, # "quoted-symbol" )),
137
+ state(# "quoted-symbol-escape" , #f ,
138
+ #("\\ abefnrt0\" " . # "quoted-symbol" ),
139
+ #('<' . # "quoted-symbol-escape-less" )),
140
+ state(# "quoted-symbol-escape-less" , #f ,
141
+ #("0-9a-fA-F" . # "quoted-symbol-hex-digits" )),
142
+ state(# "quoted-symbol-hex-digits" , #f ,
143
+ #("0-9a-fA-F" . # "quoted-symbol-hex-digits" ),
144
+ #('>' . # "quoted-symbol" )),
145
+ state(# "quoted-symbol-end" , rcurry (make-quoted-symbol, 2 , 1 )),
146
+ state(# "3quoted-symbol" , #f ,
147
+ #('"' . # "3quoted-symbol-double-quote" ),
148
+ #("\r\n !#-[]-~" . # "3quoted-symbol" ),
149
+ #('\\' . # "3quoted-symbol-escape" )),
150
+ state(# "3quoted-symbol-escape" , #f ,
151
+ #("\\ abefnrt0\" " . # "3quoted-symbol" ),
152
+ #('<' . # "3quoted-symbol-escape-less" )),
153
+ state(# "3quoted-symbol-escape-less" , #f ,
154
+ #("0-9a-fA-F" . # "3quoted-symbol-hex-digits" )),
155
+ state(# "3quoted-symbol-hex-digits" , #f ,
156
+ #("0-9a-fA-F" . # "3quoted-symbol-hex-digits" ),
157
+ #('>' . # "3quoted-symbol" )),
158
+ state(# "3quoted-symbol-double-quote" , #f ,
159
+ #('"' . # "3quoted-symbol-2-double-quotes" ),
160
+ #("\r\n !#-[]-~" . # "3quoted-symbol" )),
161
+ state(# "3quoted-symbol-2-double-quotes" , #f ,
162
+ #('"' . # "3quoted-symbol-end" ),
163
+ #("\r\n !#-[]-~" . # "3quoted-symbol" )),
164
+ state(# "3quoted-symbol-end" , rcurry (make-quoted-symbol, 4 , 3 )),
135
165
136
166
state(# "sharp-b" , #f ,
137
167
#("01" . # "binary-integer" )),
@@ -477,15 +507,76 @@ define constant $initial-state :: <state>
477
507
#('\'' . # "character" )),
478
508
state(# "character" , make-character-literal),
479
509
state(# "double-quote" , #f ,
480
- #('"' . # "string" ),
481
- #('\\' . # "double-quote-escape" ),
482
- #(" !#-[]-~" . # "double-quote" ),
483
- pair ($ascii-8 -bit-extensions, # "double-quote" )),
484
- state(# "string" , make-string-literal),
485
- state(# "double-quote-escape" , #f ,
510
+ #('"' . # "two-double-quotes" ),
511
+ #('\\' . # "string-escape" ),
512
+ #(" !#-[]-~" . # "simple-string" ),
513
+ pair ($ascii-8 -bit-extensions, # "simple-string" )),
514
+ state(# "simple-string" , #f ,
515
+ #('"' . # "end-simple-string" ),
516
+ #('\\' . # "string-escape" ),
517
+ #(" !#-[]-~" . # "simple-string" ),
518
+ pair ($ascii-8 -bit-extensions, # "simple-string" )),
519
+ state(# "end-simple-string" , make-string-literal),
520
+ state(# "two-double-quotes" , make-string-literal,
521
+ #('"' . # "3string" )),
522
+
523
+ state(# "3string" , #f , // seen """
524
+ #('"' . # "close-double-quote" ),
525
+ #('\\' . # "3string-escape" ),
526
+ #(" !#-[]-~\r\n " . # "3string" ),
527
+ pair ($ascii-8 -bit-extensions, # "3string" )),
528
+ state(# "3string-escape" , #f ,
529
+ #("\\ '\" abefnrt0" . # "3string" ),
530
+ #('<' . # "3string-escape-less" )),
531
+ state(# "3string-escape-less" , #f ,
532
+ #("0-9a-fA-F" . # "3string-hex-char-digits" )),
533
+ state(# "3string-hex-char-digits" , #f ,
534
+ #("0-9a-fA-F" . # "3string-hex-char-digits" ),
535
+ #('>' . # "3string" )),
536
+ state(# "close-double-quote" , #f ,
537
+ #('"' . # "close-double-quote-2" ),
538
+ #(" !#-[]-~\r\n " . # "3string" ),
539
+ pair ($ascii-8 -bit-extensions, # "3string" )),
540
+ state(# "close-double-quote-2" , #f ,
541
+ #('"' . # "multi-line-string" ),
542
+ #(" !#-[]-~\r\n " . # "3string" ),
543
+ pair ($ascii-8 -bit-extensions, # "3string" )),
544
+ state(# "multi-line-string" , make-multi-line-string-literal),
545
+
546
+ // Raw strings
547
+ state(# "raw-string-start" , #f , // seen #r"
548
+ #('"' . # "sharp-r-2-double-quotes" ),
549
+ #(" !#-~" . # "raw-1string" ),
550
+ pair ($ascii-8 -bit-extensions, # "raw-1string" )),
551
+ state(# "sharp-r-2-double-quotes" , make-raw-string-literal,
552
+ #('"' . # "raw-3string-start" )),
553
+ state(# "raw-1string" , #f , // seen #r" plus one non-" char
554
+ #('"' . # "raw-1string-end" ),
555
+ #(" !#-~" . # "raw-1string" ),
556
+ pair ($ascii-8 -bit-extensions, # "raw-1string" )),
557
+ state(# "raw-1string-end" , make-raw-string-literal),
558
+ state(# "raw-3string-start" , #f , // seen #r"""
559
+ #('"' . # "raw-3string-double-quote" ),
560
+ #(" !#-~\r\n " . # "raw-3string" ),
561
+ pair ($ascii-8 -bit-extensions, # "raw-3string" )),
562
+ state(# "raw-3string" , #f ,
563
+ #('"' . # "raw-3string-double-quote" ),
564
+ #(" !#-~\r\n " . # "raw-3string" ),
565
+ pair ($ascii-8 -bit-extensions, # "raw-3string" )),
566
+ state(# "raw-3string-double-quote" , #f ,
567
+ #('"' . # "raw-3string-2-double-quotes" ),
568
+ #(" !#-~\r\n " . # "raw-3string" ),
569
+ pair ($ascii-8 -bit-extensions, # "raw-3string" )),
570
+ state(# "raw-3string-2-double-quotes" , #f ,
571
+ #('"' . # "raw-3string-end" ),
572
+ #(" !#-~\r\n " . # "raw-3string" ),
573
+ pair ($ascii-8 -bit-extensions, # "raw-3string" )),
574
+ state(# "raw-3string-end" , make-multi-line-raw-string-literal),
575
+
576
+ state(# "string-escape" , #f ,
486
577
#("\\ '\" abefnrt0" . # "double-quote" ),
487
- #('<' . # "double-quote -escape-less" )),
488
- state(# "double-quote -escape-less" , #f ,
578
+ #('<' . # "string -escape-less" )),
579
+ state(# "string -escape-less" , #f ,
489
580
#("0-9a-fA-F" . # "double-quote-hex-char-digits" )),
490
581
state(# "double-quote-hex-char-digits" , #f ,
491
582
#("0-9a-fA-F" . # "double-quote-hex-char-digits" ),
0 commit comments