-
Notifications
You must be signed in to change notification settings - Fork 789
/
lex.fsl
1932 lines (1567 loc) · 77.3 KB
/
lex.fsl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright (c) Microsoft Corporation. All Rights Reserved. See License.txt in the project root for license information.
{
//------------------------------------------------------------------------
// The Lexer. Some of the complication arises from the fact it is
// reused by the Visual Studio mode to do partial lexing reporting
// whitespace etc.
//-----------------------------------------------------------------------
open System
open System.Globalization
open System.Text
open Internal.Utilities.Library
open Internal.Utilities.Library.Extras
open FSharp.Compiler
open FSharp.Compiler.DiagnosticsLogger
open FSharp.Compiler.Features
open FSharp.Compiler.IO
open FSharp.Compiler.Syntax
open FSharp.Compiler.Text.Range
module Ranges =
/// Whether valid as signed int8 when a minus sign is prepended, compares true to 0x80
let isInt8BadMax x = 1 <<< 7 = x
/// Whether valid as signed int16 when a minus sign is prepended, compares true to 0x8000
let isInt16BadMax x = 1 <<< 15 = x
/// Whether valid as signed int32 when a minus sign is prepended, compares as string against "2147483648".
let isInt32BadMax = let max = string(1UL <<< 31) in fun s -> max = s
/// Whether valid as signed int64 when a minus sign is prepended, compares as string against "9223372036854775808".
let isInt64BadMax = let max = string(1UL <<< 63) in fun s -> max = s
/// Get string from lexbuf
let lexeme (lexbuf : UnicodeLexing.Lexbuf) = UnicodeLexing.Lexbuf.LexemeString lexbuf
/// Trim n chars from both sides of lexbuf, return string
let lexemeTrimBoth (lexbuf : UnicodeLexing.Lexbuf) (n:int) (m:int) =
let s = lexbuf.LexemeView
s.Slice(n, s.Length - (n+m)).ToString()
/// Trim n chars from the right of lexbuf, return string
let lexemeTrimRight lexbuf n = lexemeTrimBoth lexbuf 0 n
/// Trim n chars from the left of lexbuf, return string
let lexemeTrimLeft lexbuf n = lexemeTrimBoth lexbuf n 0
/// Throw a lexing error with a message
let fail args (lexbuf:UnicodeLexing.Lexbuf) msg dflt =
let m = lexbuf.LexemeRange
args.diagnosticsLogger.ErrorR(Error(msg,m))
dflt
//--------------------------
// Integer parsing
// Parsing integers is common in bootstrap runs (parsing
// the parser tables, no doubt). So this is an optimized
// version of the F# core library parsing code with the call to "Trim"
// removed, which appears in profiling runs as a small but significant cost.
let getSign32 (s:string) (p:byref<int>) l =
if (l >= p + 1 && s.[p] = '-')
then p <- p + 1; -1
else 1
let isOXB c =
let c = Char.ToLowerInvariant c
c = 'x' || c = 'o' || c = 'b'
let is0OXB (s:string) p l =
l >= p + 2 && s.[p] = '0' && isOXB s.[p+1]
let get0OXB (s:string) (p:byref<int>) l =
if is0OXB s p l
then let r = Char.ToLowerInvariant s.[p+1] in p <- p + 2; r
else 'd'
let parseBinaryUInt64 (s:string) =
Convert.ToUInt64(s, 2)
let parseOctalUInt64 (s:string) =
Convert.ToUInt64(s, 8)
let removeUnderscores (s:string) =
s.Replace("_", "")
let parseInt32 (s:string) =
let s = removeUnderscores s
let l = s.Length
let mutable p = 0
let sign = getSign32 s &p l
let specifier = get0OXB s &p l
match Char.ToLower(specifier,CultureInfo.InvariantCulture) with
| 'x' -> sign * (int32 (Convert.ToUInt32(UInt64.Parse(s.Substring(p), NumberStyles.AllowHexSpecifier,CultureInfo.InvariantCulture))))
| 'b' -> sign * (int32 (Convert.ToUInt32(parseBinaryUInt64 (s.Substring(p)))))
| 'o' -> sign * (int32 (Convert.ToUInt32(parseOctalUInt64 (s.Substring(p)))))
| _ -> Int32.Parse(s, NumberStyles.AllowLeadingSign, CultureInfo.InvariantCulture)
let lexemeTrimRightToInt32 args lexbuf n =
try parseInt32 (lexemeTrimRight lexbuf n)
with _ -> fail args lexbuf (FSComp.SR.lexOutsideIntegerRange()) 0
//--------------------------
// Checks
let checkExprOp (lexbuf:UnicodeLexing.Lexbuf) =
if lexbuf.LexemeContains ':' then
deprecatedWithError (FSComp.SR.lexCharNotAllowedInOperatorNames(":")) lexbuf.LexemeRange
if lexbuf.LexemeContains '$' then
deprecatedWithError (FSComp.SR.lexCharNotAllowedInOperatorNames("$")) lexbuf.LexemeRange
let checkExprGreaterColonOp (lexbuf:UnicodeLexing.Lexbuf) =
if lexbuf.LexemeContains '$' then
deprecatedWithError (FSComp.SR.lexCharNotAllowedInOperatorNames("$")) lexbuf.LexemeRange
let unexpectedChar lexbuf =
LEX_FAILURE (FSComp.SR.lexUnexpectedChar(lexeme lexbuf))
let startString args (lexbuf: UnicodeLexing.Lexbuf) =
let buf = ByteBuffer.Create StringCapacity
let m = lexbuf.LexemeRange
let startp = lexbuf.StartPos
let fin =
LexerStringFinisher (fun buf kind context cont ->
// Adjust the start-of-token mark back to the true start of the token
lexbuf.StartPos <- startp
let isPart = context.HasFlag(LexerStringFinisherContext.InterpolatedPart)
let isVerbatim = context.HasFlag(LexerStringFinisherContext.Verbatim)
let isTripleQuote = context.HasFlag(LexerStringFinisherContext.TripleQuote)
if kind.IsByteString then
let synByteStringKind = if isVerbatim then SynByteStringKind.Verbatim else SynByteStringKind.Regular
if kind.IsInterpolated then
fail args lexbuf (FSComp.SR.lexByteStringMayNotBeInterpolated()) ()
BYTEARRAY (Lexhelp.stringBufferAsBytes buf, synByteStringKind, cont)
else
match Lexhelp.errorsInByteStringBuffer buf with
| Some (largerThanOneByte, largerThan127) ->
if largerThanOneByte > 0 then
fail args lexbuf (FSComp.SR.lexByteArrayCannotEncode(largerThanOneByte)) ()
if largerThan127 > 0 then
warning (Error(FSComp.SR.lexByteArrayOutisdeAscii(largerThan127), lexbuf.LexemeRange))
| None -> ()
BYTEARRAY (Lexhelp.stringBufferAsBytes buf, synByteStringKind, cont)
elif kind.IsInterpolated then
let s = Lexhelp.stringBufferAsString buf
if kind.IsInterpolatedFirst then
let synStringKind =
if isTripleQuote then
SynStringKind.TripleQuote
elif isVerbatim then
SynStringKind.Verbatim
else
SynStringKind.Regular
if isPart then
INTERP_STRING_BEGIN_PART (s, synStringKind, cont)
else
INTERP_STRING_BEGIN_END (s, synStringKind, cont)
else
if isPart then
INTERP_STRING_PART (s, cont)
else
INTERP_STRING_END (s, cont)
else
let s = Lexhelp.stringBufferAsString buf
let synStringKind =
if isVerbatim then
SynStringKind.Verbatim
elif isTripleQuote then
SynStringKind.TripleQuote
else
SynStringKind.Regular
STRING (s, synStringKind, cont))
buf,fin,m
// Utility functions for processing XML documentation
let trySaveXmlDoc (lexbuf: LexBuffer<char>) (buff: (range * StringBuilder) option) =
match buff with
| None -> ()
| Some (start, sb) ->
let xmlCommentLineRange = mkFileIndexRange start.FileIndex start.Start (posOfLexPosition lexbuf.StartPos)
LexbufLocalXmlDocStore.SaveXmlDocLine (lexbuf, sb.ToString(), xmlCommentLineRange)
let tryAppendXmlDoc (buff: (range * StringBuilder) option) (s:string) =
match buff with
| None -> ()
| Some (_, sb) -> ignore(sb.Append s)
// Utilities for parsing #if/#else/#endif
let shouldStartLine args lexbuf (m:range) err tok =
if (m.StartColumn <> 0) then fail args lexbuf err tok
else tok
let shouldStartFile args lexbuf (m:range) err tok =
if (m.StartColumn <> 0 || m.StartLine <> 1) then fail args lexbuf err tok
else tok
let evalIfDefExpression startPos reportLibraryOnlyFeatures langVersion strictIndentation args (lookup: string -> bool) (lexed: string) =
let lexbuf = LexBuffer<char>.FromChars (reportLibraryOnlyFeatures, langVersion, strictIndentation, lexed.ToCharArray ())
lexbuf.StartPos <- startPos
lexbuf.EndPos <- startPos
let tokenStream = FSharp.Compiler.PPLexer.tokenstream args
let expr = FSharp.Compiler.PPParser.start tokenStream lexbuf
(LexerIfdefEval lookup expr), expr
let evalFloat args lexbuf =
try
float32(removeUnderscores (lexemeTrimRight lexbuf 1))
with _ ->
fail args lexbuf (FSComp.SR.lexInvalidFloat()) 0.0f
}
let letter = '\Lu' | '\Ll' | '\Lt' | '\Lm' | '\Lo' | '\Nl'
let surrogateChar = '\Cs'
let digit = '\Nd'
let hex = ['0'-'9'] | ['A'-'F'] | ['a'-'f']
let truewhite = [' ']
let offwhite = ['\t']
let anywhite = truewhite | offwhite
let anychar = [^'\n''\r']
let anystring = anychar*
let op_char = '!'|'$'|'%'|'&'|'*'|'+'|'-'|'.'|'/'|'<'|'='|'>'|'?'|'@'|'^'|'|'|'~'|':'
let ignored_op_char = '.' | '$' | '?'
let separator = '_'
let xinteger =
( '0' ('x'| 'X') hex ((hex | separator)* hex)?
| '0' ('o'| 'O') (['0'-'7']) (((['0'-'7']) | separator)* (['0'-'7']))?
| '0' ('b'| 'B') (['0'-'1']) (((['0'-'1']) | separator)* (['0'-'1']))?)
let integer = digit ((digit | separator)* digit)?
let int8 = integer 'y'
let uint8 = (xinteger | integer) 'u' 'y'
let int16 = integer 's'
let uint16 = (xinteger | integer) 'u' 's'
let int = integer
let int32 = integer 'l'
let uint32 = (xinteger | integer) 'u'
let uint32l = (xinteger | integer) 'u' 'l'
let nativeint = (xinteger | integer) 'n'
let unativeint = (xinteger | integer) 'u' 'n'
let int64 = (xinteger | integer) 'L'
let uint64 = (xinteger | integer) ('u' | 'U') 'L'
let xint8 = xinteger 'y'
let xint16 = xinteger 's'
let xint = xinteger
let xint32 = xinteger 'l'
let floatp = digit ((digit | separator)* digit)? '.' (digit ((digit | separator)* digit)?)?
let floate = digit ((digit | separator)* digit)? ('.' (digit ((digit | separator)* digit)?)? )? ('e'| 'E') ['+' '-']? digit ((digit | separator)* digit)?
let float = floatp | floate
let bignum = integer ('I' | 'N' | 'Z' | 'Q' | 'R' | 'G')
let ieee64 = float
let ieee32 = float ('f' | 'F')
let ieee32_dotless_no_exponent = integer ('f' | 'F')
let decimal = (float | integer) ('m' | 'M')
let xieee32 = xinteger 'l' 'f'
let xieee64 = xinteger 'L' 'F'
let escape_char = ('\\' ( '\\' | "\"" | '\'' | 'a' | 'f' | 'v' | 'n' | 't' | 'b' | 'r'))
let char = '\'' ( [^'\\''\n''\r''\t''\b'] | escape_char) '\''
let trigraph = '\\' digit digit digit
let hexGraphShort = '\\' 'x' hex hex
let unicodeGraphShort = '\\' 'u' hex hex hex hex
let unicodeGraphLong = '\\' 'U' hex hex hex hex hex hex hex hex
let newline = ('\n' | '\r' '\n')
let connecting_char = '\Pc'
let combining_char = '\Mn' | '\Mc'
let formatting_char = '\Cf'
let ident_start_char =
letter | '_'
let ident_char =
letter
| connecting_char
| combining_char
| formatting_char
| digit
| ['\'']
let ident = ident_start_char ident_char*
rule token (args: LexArgs) (skip: bool) = parse
| ident
{ Keywords.KeywordOrIdentifierToken args lexbuf (lexeme lexbuf) }
| "do!"
{ DO_BANG }
| "yield!"
{ YIELD_BANG(true) }
| "return!"
{ YIELD_BANG(false) }
| "match!"
{ MATCH_BANG }
| "and!"
{ AND_BANG(false) }
| "while!"
{ WHILE_BANG }
| ident '!'
{ let tok = Keywords.KeywordOrIdentifierToken args lexbuf (lexemeTrimRight lexbuf 1)
match tok with
| LET _ -> BINDER (lexemeTrimRight lexbuf 1)
| _ -> fail args lexbuf (FSComp.SR.lexIdentEndInMarkReserved("!")) (Keywords.KeywordOrIdentifierToken args lexbuf (lexeme lexbuf)) }
| ident ('#')
{ fail args lexbuf (FSComp.SR.lexIdentEndInMarkReserved("#")) (Keywords.KeywordOrIdentifierToken args lexbuf (lexeme lexbuf)) }
| int8
{ let n = lexemeTrimRightToInt32 args lexbuf 1
// Allow <max_int+1> to parse as min_int. Allowed only because we parse '-' as an operator.
if Ranges.isInt8BadMax n then INT8(SByte.MinValue, true (* 'true' = 'bad'*) )
else if n > int SByte.MaxValue || n < int SByte.MinValue then fail args lexbuf (FSComp.SR.lexOutsideEightBitSigned()) (INT8(0y, false))
else INT8(sbyte n, false) }
| xint8
{ let n = lexemeTrimRightToInt32 args lexbuf 1
if n > int Byte.MaxValue || n < 0 then fail args lexbuf (FSComp.SR.lexOutsideEightBitSignedHex()) (INT8(0y, false))
else INT8(sbyte(byte(n)), false) }
| uint8
{ let n = lexemeTrimRightToInt32 args lexbuf 2
if n > int Byte.MaxValue || n < 0 then fail args lexbuf (FSComp.SR.lexOutsideEightBitUnsigned()) (UINT8(0uy))
else UINT8(byte n) }
| int16
{ let n = lexemeTrimRightToInt32 args lexbuf 1
// Allow <max_int+1> to parse as min_int. Allowed only because we parse '-' as an operator.
if Ranges.isInt16BadMax n then INT16(Int16.MinValue, true (* 'true' = 'bad'*) )
else if n > int Int16.MaxValue || n < int Int16.MinValue then fail args lexbuf (FSComp.SR.lexOutsideSixteenBitSigned()) (INT16(0s, false))
else INT16(int16 n, false) }
| xint16
{ let n = lexemeTrimRightToInt32 args lexbuf 1
if n > int UInt16.MaxValue || n < 0 then fail args lexbuf (FSComp.SR.lexOutsideSixteenBitSigned()) (INT16(0s,false))
else INT16(int16(uint16(n)), false) }
| uint16
{ let n = lexemeTrimRightToInt32 args lexbuf 2
if n > int UInt16.MaxValue || n < 0 then fail args lexbuf (FSComp.SR.lexOutsideSixteenBitUnsigned()) (UINT16(0us))
else UINT16(uint16 n) }
| int '.' '.'
{ let s = removeUnderscores (lexemeTrimRight lexbuf 2)
// Allow <max_int+1> to parse as min_int. Allowed only because we parse '-' as an operator.
if Ranges.isInt32BadMax s then INT32_DOT_DOT(Int32.MinValue, true (* 'true' = 'bad'*) ) else
let n = try int32 s with _ -> fail args lexbuf (FSComp.SR.lexOutsideThirtyTwoBitSigned()) 0
INT32_DOT_DOT(n, false)
}
| xint
| int
{ let s = removeUnderscores (lexeme lexbuf)
// Allow <max_int+1> to parse as min_int. Allowed only because we parse '-' as an operator.
if Ranges.isInt32BadMax s then INT32(Int32.MinValue, true (* 'true' = 'bad'*) ) else
let n =
try int32 s with _ -> fail args lexbuf (FSComp.SR.lexOutsideThirtyTwoBitSigned()) 0
INT32(n, false)
}
| xint32
| int32
{ let s = removeUnderscores (lexemeTrimRight lexbuf 1)
// Allow <max_int+1> to parse as min_int. Allowed only because we parse '-' as an operator.
if Ranges.isInt32BadMax s then INT32(Int32.MinValue, true (* 'true' = 'bad'*) ) else
let n =
try int32 s with _ -> fail args lexbuf (FSComp.SR.lexOutsideThirtyTwoBitSigned()) 0
INT32(n, false)
}
| uint32
{
let s = removeUnderscores (lexemeTrimRight lexbuf 1)
let n =
try int64 s with _ -> fail args lexbuf (FSComp.SR.lexOutsideThirtyTwoBitUnsigned()) 0L
if n > int64 UInt32.MaxValue || n < 0L then fail args lexbuf (FSComp.SR.lexOutsideThirtyTwoBitUnsigned()) (UINT32(0u)) else
UINT32(uint32 (uint64 n)) }
| uint32l
{
let s = removeUnderscores (lexemeTrimRight lexbuf 2)
let n =
try int64 s with _ -> fail args lexbuf (FSComp.SR.lexOutsideThirtyTwoBitUnsigned()) 0L
if n > int64 UInt32.MaxValue || n < 0L then fail args lexbuf (FSComp.SR.lexOutsideThirtyTwoBitUnsigned()) (UINT32(0u)) else
UINT32(uint32 (uint64 n)) }
| int64
{ let s = removeUnderscores (lexemeTrimRight lexbuf 1)
// Allow <max_int+1> to parse as min_int. Stupid but allowed because we parse '-' as an operator.
if Ranges.isInt64BadMax s then INT64(Int64.MinValue, true (* 'true' = 'bad'*) ) else
let n =
try int64 s with _ -> fail args lexbuf (FSComp.SR.lexOutsideSixtyFourBitSigned()) 0L
INT64(n,false)
}
| uint64
{ let s = removeUnderscores (lexemeTrimRight lexbuf 2)
let n =
try uint64 s with _ -> fail args lexbuf (FSComp.SR.lexOutsideSixtyFourBitUnsigned()) 0UL
UINT64(n) }
| nativeint
{ let s = removeUnderscores (lexemeTrimRight lexbuf 1)
// Allow <max_nativeint+1> to parse as min_nativeint. Stupid but allowed because we parse '-' as an operator.
if Ranges.isInt64BadMax s then NATIVEINT(Int64.MinValue, true) else
let n =
try int64 s with _ -> fail args lexbuf (FSComp.SR.lexOutsideNativeSigned()) 0L
NATIVEINT(n,false)
}
| unativeint
{ try
UNATIVEINT(uint64 (removeUnderscores (lexemeTrimRight lexbuf 2)))
with _ -> fail args lexbuf (FSComp.SR.lexOutsideNativeUnsigned()) (UNATIVEINT(0UL)) }
| ieee32
{ IEEE32 (evalFloat args lexbuf) }
| ieee32_dotless_no_exponent
{ if lexbuf.SupportsFeature LanguageFeature.DotlessFloat32Literal then
IEEE32 (evalFloat args lexbuf)
else
fail args lexbuf (FSComp.SR.lexInvalidFloat()) (IEEE32 0.0f)
}
| ieee64
{ IEEE64 (try float(lexeme lexbuf) with _ -> fail args lexbuf (FSComp.SR.lexInvalidFloat()) 0.0) }
| decimal
{ try
let s = removeUnderscores (lexemeTrimRight lexbuf 1)
// This implements a range check for decimal literals
let d = System.Decimal.Parse(s,System.Globalization.NumberStyles.AllowExponent ||| System.Globalization.NumberStyles.Number,System.Globalization.CultureInfo.InvariantCulture)
DECIMAL d
with
e -> fail args lexbuf (FSComp.SR.lexOutsideDecimal()) (DECIMAL (decimal 0))
}
| xieee32
{
let s = removeUnderscores (lexemeTrimRight lexbuf 2)
// Even though the intermediate step is an int64, display the "invalid float" message, since it will be less confusing to the user
let n64 = (try (int64 s) with _ -> fail args lexbuf (FSComp.SR.lexInvalidFloat()) 0L)
if n64 > 0xFFFFFFFFL || n64 < 0L then fail args lexbuf (FSComp.SR.lexOutsideThirtyTwoBitFloat()) (IEEE32 0.0f) else
IEEE32 (System.BitConverter.ToSingle(System.BitConverter.GetBytes(int32 (uint32 (uint64 n64))),0)) }
| xieee64
{
let n64 = (try int64 (removeUnderscores (lexemeTrimRight lexbuf 2)) with _ -> fail args lexbuf (FSComp.SR.lexInvalidFloat()) 0L)
IEEE64 (System.BitConverter.Int64BitsToDouble(n64)) }
| bignum
{ let s = lexeme lexbuf
BIGNUM (removeUnderscores (lexemeTrimRight lexbuf 1), s.[s.Length-1..s.Length-1]) }
| (int | xint | float) ident_char+
{ fail args lexbuf (FSComp.SR.lexInvalidNumericLiteral()) (INT32(0,false)) }
| char
{ let s = lexeme lexbuf
CHAR (if s.[1] = '\\' then escape s.[2] else s.[1]) }
| char 'B'
{ let s = lexeme lexbuf
let x = int32 (if s.[1] = '\\' then escape s.[2] else s.[1])
if x < 0 || x > 127 then
fail args lexbuf (FSComp.SR.lexInvalidAsciiByteLiteral()) (UINT8(byte 0))
else
UINT8 (byte(x)) }
| '\'' trigraph '\''
{ let s = lexeme lexbuf
let c = trigraph s.[2] s.[3] s.[4]
let x = int32 c
if x < 0 || x > 255 then
fail args lexbuf (FSComp.SR.lexInvalidCharLiteral()) (CHAR c)
else
CHAR c }
| '\'' trigraph '\'' 'B'
{ let s = lexeme lexbuf
let x = int32 (trigraph s.[2] s.[3] s.[4])
if x < 0 || x > 255 then
fail args lexbuf (FSComp.SR.lexInvalidAsciiByteLiteral()) (UINT8(byte 0))
elif x > 127 then
// TODO: Promote to Error:
// * Adjust range check in `if` above to `x > 127`
// * Remove this `elif` expression
// * Remove `lexInvalidTrigraphAsciiByteLiteral` from `FSComp.txt`
warning (Error(FSComp.SR.lexInvalidTrigraphAsciiByteLiteral(), lexbuf.LexemeRange))
UINT8 (byte(x))
else
UINT8 (byte(x)) }
| '\'' unicodeGraphShort '\'' { CHAR (char (int32 (unicodeGraphShort (lexemeTrimBoth lexbuf 3 1)))) }
| '\'' unicodeGraphShort '\'' 'B'
{ let x = int32 (unicodeGraphShort (lexemeTrimBoth lexbuf 3 2))
if x < 0 || x > 127 then
fail args lexbuf (FSComp.SR.lexInvalidAsciiByteLiteral()) (UINT8(byte 0))
else
UINT8 (byte(x)) }
| '\'' hexGraphShort '\'' { CHAR (char (int32 (hexGraphShort (lexemeTrimBoth lexbuf 3 1)))) }
| '\'' hexGraphShort '\'' 'B'
{ let x = int32 (hexGraphShort (lexemeTrimBoth lexbuf 3 2))
if x < 0 || x > 127 then
fail args lexbuf (FSComp.SR.lexInvalidAsciiByteLiteral()) (UINT8(byte 0))
else
UINT8 (byte(x)) }
| '\'' unicodeGraphLong '\''
{ match unicodeGraphLong (lexemeTrimBoth lexbuf 3 1) with
| SingleChar(c) -> CHAR (char c)
| _ -> fail args lexbuf (FSComp.SR.lexThisUnicodeOnlyInStringLiterals()) (CHAR (char 0)) }
| '\'' unicodeGraphLong '\'' 'B'
{ match unicodeGraphLong (lexemeTrimBoth lexbuf 3 2) with
| SingleChar(c) ->
let x = int32 c
if x < 0 || x > 127 then
fail args lexbuf (FSComp.SR.lexInvalidAsciiByteLiteral()) (UINT8(byte 0))
else
UINT8 (byte(x))
| _ -> fail args lexbuf (FSComp.SR.lexInvalidAsciiByteLiteral()) (UINT8(byte 0)) }
| "(*IF-FSHARP"
{ if lexbuf.SupportsFeature LanguageFeature.MLCompatRevisions then
mlCompatWarning (FSComp.SR.lexIndentOffForML()) lexbuf.LexemeRange
if not skip then COMMENT (LexCont.Token (args.ifdefStack, args.stringNest))
else token args skip lexbuf }
| "(*F#"
{ if not skip then COMMENT (LexCont.Token (args.ifdefStack, args.stringNest))
else token args skip lexbuf }
| "ENDIF-FSHARP*)"
{ if not skip then COMMENT (LexCont.Token (args.ifdefStack, args.stringNest))
else token args skip lexbuf }
| "F#*)"
{ if not skip then COMMENT (LexCont.Token (args.ifdefStack, args.stringNest))
else token args skip lexbuf }
| "(*)"
{ LPAREN_STAR_RPAREN }
| "(*"
{ let m = lexbuf.LexemeRange
if not skip then COMMENT (LexCont.Comment(args.ifdefStack, args.stringNest, 1, m))
else comment (1,m,args) skip lexbuf }
| "(*IF-CAML*)" | "(*IF-OCAML*)"
{ let m = lexbuf.LexemeRange
if not skip then COMMENT (LexCont.MLOnly(args.ifdefStack, args.stringNest, m))
else mlOnly m args skip lexbuf }
| '"'
{ let buf, fin, m = startString args lexbuf
// Single quote in triple quote ok, others disallowed
match args.stringNest with
| (_, LexerStringStyle.ExtendedInterpolated, _, _, _) :: _
| (_, LexerStringStyle.TripleQuote, _, _, _) :: _ -> ()
| _ :: _ -> errorR(Error(FSComp.SR.lexSingleQuoteInSingleQuote(), m))
| [] -> ()
if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.SingleQuote, LexerStringKind.String, args.interpolationDelimiterLength, m))
else singleQuoteString (buf, fin, m, LexerStringKind.String, args) skip lexbuf }
| '$' '"' '"' '"'
{ let buf, fin, m = startString args lexbuf
// Single quote in triple quote ok, others disallowed
match args.stringNest with
| _ :: _ -> errorR(Error(FSComp.SR.lexTripleQuoteInTripleQuote(), m))
| [] -> ()
args.interpolationDelimiterLength <- 1
if not skip then STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.TripleQuote, LexerStringKind.InterpolatedStringFirst, 1, m))
else tripleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf }
| ('$'+) '"' '"' '"'
{ let buf, fin, m = startString args lexbuf
if lexbuf.SupportsFeature LanguageFeature.ExtendedStringInterpolation then
// Single quote in triple quote ok, others disallowed
match args.stringNest with
| _ :: _ -> errorR(Error(FSComp.SR.lexTripleQuoteInTripleQuote(), m))
| [] -> ()
args.interpolationDelimiterLength <- lexeme lexbuf |> Seq.takeWhile (fun c -> c = '$') |> Seq.length
if not skip then
STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.ExtendedInterpolated, LexerStringKind.InterpolatedStringFirst, args.interpolationDelimiterLength, m))
else
extendedInterpolatedString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf
else
let result =
if not skip then
STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.TripleQuote, LexerStringKind.InterpolatedStringFirst, args.interpolationDelimiterLength, m))
else
tripleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf
fail args lexbuf (FSComp.SR.lexExtendedStringInterpolationNotSupported()) result
}
| '$' '"'
{ let buf,fin,m = startString args lexbuf
// Single quote in triple quote ok, others disallowed
match args.stringNest with
| (_, style, _, _, _) :: _ when style = LexerStringStyle.ExtendedInterpolated || style = LexerStringStyle.TripleQuote -> ()
| _ :: _ -> errorR(Error(FSComp.SR.lexSingleQuoteInSingleQuote(), m))
| _ -> ()
if not skip then
STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.SingleQuote, LexerStringKind.InterpolatedStringFirst, args.interpolationDelimiterLength, m))
else
singleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf }
| '"' '"' '"'
{ let buf, fin, m = startString args lexbuf
args.interpolationDelimiterLength <- 0
// Single quote in triple quote ok, others disallowed
match args.stringNest with
| _ :: _ -> errorR(Error(FSComp.SR.lexTripleQuoteInTripleQuote(), m))
| _ -> ()
if not skip then
STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.TripleQuote, LexerStringKind.String, args.interpolationDelimiterLength, m))
else
tripleQuoteString (buf, fin, m, LexerStringKind.String, args) skip lexbuf }
| '@' '"'
{ let buf, fin, m = startString args lexbuf
// Single quote in triple quote ok, others disallowed
match args.stringNest with
| (_, LexerStringStyle.ExtendedInterpolated, _, _, _) :: _
| (_, LexerStringStyle.TripleQuote, _, _, _) :: _ -> ()
| _ :: _ -> errorR(Error(FSComp.SR.lexSingleQuoteInSingleQuote(), m))
| _ -> ()
if not skip then
STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.Verbatim, LexerStringKind.String, args.interpolationDelimiterLength, m))
else
verbatimString (buf, fin, m, LexerStringKind.String, args) skip lexbuf }
| ("$@" | "@$") '"'
{ let buf, fin, m = startString args lexbuf
// Single quote in triple quote ok, others disallowed
match args.stringNest with
| (_, style, _, _, _) :: _ when style = LexerStringStyle.ExtendedInterpolated || style = LexerStringStyle.TripleQuote -> ()
| _ :: _ -> errorR(Error(FSComp.SR.lexSingleQuoteInSingleQuote(), m))
| _ -> ()
if not skip then
STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.Verbatim, LexerStringKind.InterpolatedStringFirst, args.interpolationDelimiterLength, m))
else
verbatimString (buf, fin, m, LexerStringKind.InterpolatedStringFirst, args) skip lexbuf }
| truewhite+
{ if skip then token args skip lexbuf
else WHITESPACE (LexCont.Token(args.ifdefStack, args.stringNest)) }
| offwhite+
{ if args.indentationSyntaxStatus.Status then errorR(Error(FSComp.SR.lexTabsNotAllowed(), lexbuf.LexemeRange))
if not skip then WHITESPACE (LexCont.Token(args.ifdefStack, args.stringNest))
else token args skip lexbuf }
| "////" op_char*
{ // 4+ slash are 1-line comments, online 3 slash are XmlDoc
let m = lexbuf.LexemeRange
LexbufLocalXmlDocStore.AddGrabPointDelayed(lexbuf)
if not skip then LINE_COMMENT (LexCont.SingleLineComment(args.ifdefStack, args.stringNest, 1, m))
else singleLineComment (None,1,m,m,args) skip lexbuf }
| "///" op_char*
{ // Match exactly 3 slash, 4+ slash caught by preceding rule
let m = lexbuf.LexemeRange
let doc = lexemeTrimLeft lexbuf 3
let sb = (new StringBuilder(100)).Append(doc)
if not skip then LINE_COMMENT (LexCont.SingleLineComment(args.ifdefStack, args.stringNest, 1, m))
else singleLineComment (Some (m, sb),1,m,m,args) skip lexbuf }
| "//" op_char*
{ // Need to read all operator symbols too, otherwise it might be parsed by a rule below
let m = lexbuf.LexemeRange
LexbufLocalXmlDocStore.AddGrabPointDelayed(lexbuf)
if not skip then LINE_COMMENT (LexCont.SingleLineComment(args.ifdefStack, args.stringNest, 1, m))
else singleLineComment (None,1,m,m,args) skip lexbuf }
| newline
{ newline lexbuf
if not skip then WHITESPACE (LexCont.Token(args.ifdefStack, args.stringNest))
else token args skip lexbuf }
| '`' '`' ([^'`' '\n' '\r' '\t'] | '`' [^'`''\n' '\r' '\t'])+ '`' '`'
{ Keywords.IdentifierToken args lexbuf (lexemeTrimBoth lexbuf 2 2) }
| '`' '`' (([^'`' '\n' '\r' '\t'] | ('`' [^'`' '\n' '\r' '\t']))+)'`'
{ errorR(Error(FSComp.SR.lexInvalidIdentifier(), lexbuf.LexemeRange))
Keywords.IdentifierToken args lexbuf (lexemeTrimBoth lexbuf 2 1) }
| '`' '`' (([^'`' '\n' '\r' '\t'] | ('`'[^'`' '\n' '\r' '\t']))+)
{ errorR(Error(FSComp.SR.lexInvalidIdentifier(), lexbuf.LexemeRange))
Keywords.IdentifierToken args lexbuf (lexemeTrimLeft lexbuf 2) }
| "````" | "```" | "``" | "`"
{ errorR(Error(FSComp.SR.lexInvalidIdentifier(), lexbuf.LexemeRange))
Keywords.IdentifierToken args lexbuf "" }
| ('#' anywhite* | "#line" anywhite+ ) digit+ anywhite* ('@'? "\"" [^'\n''\r''"']+ '"')? anywhite* newline
{ let pos = lexbuf.EndPos
if skip then
let s = lexeme lexbuf
let rec parseLeadingDirective n =
match s.[n] with
| c when c >= 'a' && c <= 'z' -> parseLeadingDirective (n+1)
| _ -> parseLeadingWhitespace n // goto the next state
and parseLeadingWhitespace n =
match s.[n] with
| ' ' | '\t' -> parseLeadingWhitespace (n+1)
| _ -> parseLineNumber n n // goto the next state
and parseLineNumber start n =
match s.[n] with
| c when c >= '0' && c <= '9' -> parseLineNumber start (n+1)
| _ ->
let text = String.sub s start (n-start)
let lineNumber =
try
int32 text
with err ->
errorR(Error(FSComp.SR.lexInvalidLineNumber(text), lexbuf.LexemeRange))
0
lineNumber, parseWhitespaceBeforeFile n // goto the next state
and parseWhitespaceBeforeFile n =
match s.[n] with
| ' ' | '\t' | '@' -> parseWhitespaceBeforeFile (n+1)
| '"' -> Some (parseFile (n+1) (n+1))
| _ -> None
and parseFile start n =
match s.[n] with
| '"' -> String.sub s start (n-start)
| _ -> parseFile start (n+1)
// Call the parser
let line, file = parseLeadingDirective 1
// Construct the new position
if args.applyLineDirectives then
lexbuf.EndPos <- pos.ApplyLineDirective((match file with Some f -> FileIndex.fileIndexOfFile f | None -> pos.FileIndex), line)
else
// add a newline when we don't apply a directive since we consumed a newline getting here
newline lexbuf
token args skip lexbuf
else
// add a newline when we don't apply a directive since we consumed a newline getting here
newline lexbuf
HASH_LINE (LexCont.Token (args.ifdefStack, args.stringNest))
}
| "<@" { checkExprOp lexbuf; LQUOTE ("<@ @>", false) }
| "<@@" { checkExprOp lexbuf; LQUOTE ("<@@ @@>", true) }
| "@>" { checkExprOp lexbuf; RQUOTE ("<@ @>", false) }
| "@@>" { checkExprOp lexbuf; RQUOTE ("<@@ @@>", true) }
| '#' { HASH }
| '&' { AMP }
| "&&" { AMP_AMP }
| "||" { BAR_BAR }
| '\'' { QUOTE }
| '(' { LPAREN }
| ')' { RPAREN }
| '*' { STAR }
| ',' { COMMA }
| "->" { RARROW }
| "?" { QMARK }
| "??" { QMARK_QMARK }
| ".." { DOT_DOT }
| "..^" { DOT_DOT_HAT }
| "." { DOT }
| ":" { COLON }
| "::" { COLON_COLON }
| ":>" { COLON_GREATER }
| "@>." { RQUOTE_DOT ("<@ @>",false) }
| "@@>." { RQUOTE_DOT ("<@@ @@>",true) }
| ">|]" { GREATER_BAR_RBRACK }
| ":?>" { COLON_QMARK_GREATER }
| ":?" { COLON_QMARK }
| ":=" { COLON_EQUALS }
| ";;" { SEMICOLON_SEMICOLON }
| ";" { SEMICOLON }
| "<-" { LARROW }
| "=" { EQUALS }
| "[" { LBRACK }
| "[|" { LBRACK_BAR }
| "{|" { LBRACE_BAR }
| "<" { LESS false }
| ">" { GREATER false }
| "[<" { LBRACK_LESS }
| "]" { RBRACK }
| "|]" { BAR_RBRACK }
| "|}" { BAR_RBRACE }
| ">]" { GREATER_RBRACK }
| "{"
{
match args.stringNest with
| [] -> ()
| (counter, style, d, _, m) :: rest ->
// Note, we do not update the 'm', any incomplete-interpolation error
// will be reported w.r.t. the first '{'
args.stringNest <- (counter + 1, style, d, None, m) :: rest
// To continue token-by-token lexing may involve picking up the new args.stringNes
let cont = LexCont.Token(args.ifdefStack, args.stringNest)
LBRACE cont
}
| "|" { BAR }
| "}"
{
// We encounter a '}' in the expression token stream. First check if we're in an interpolated string expression
// and continue the string if necessary
match args.stringNest with
| (1, LexerStringStyle.ExtendedInterpolated, delimLength, altR, r) :: rest when delimLength > 1 ->
// On the first "}" of multiple "}", keep the range of the starting "}" for later processing in startString
let altStart =
match altR with
| None -> Some lexbuf.LexemeRange
| _ -> altR
args.stringNest <- (1, LexerStringStyle.ExtendedInterpolated, delimLength - 1, altStart, r) :: rest
token args skip lexbuf
| (1, style, _, altR, _r) :: rest ->
args.stringNest <- rest
altR
|> Option.iter (fun r ->
let n = r.StartColumn - lexbuf.StartPos.Column
lexbuf.StartPos <- lexbuf.StartPos.ShiftColumnBy(n))
let buf, fin, m = startString args lexbuf
if not skip then
STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, style, LexerStringKind.InterpolatedStringPart, args.interpolationDelimiterLength, m))
else
match style with
| LexerStringStyle.Verbatim -> verbatimString (buf, fin, m, LexerStringKind.InterpolatedStringPart, args) skip lexbuf
| LexerStringStyle.SingleQuote -> singleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringPart, args) skip lexbuf
| LexerStringStyle.TripleQuote -> tripleQuoteString (buf, fin, m, LexerStringKind.InterpolatedStringPart, args) skip lexbuf
| LexerStringStyle.ExtendedInterpolated -> extendedInterpolatedString (buf, fin, m, LexerStringKind.InterpolatedStringPart, args) skip lexbuf
| (counter, style, d, altR, m) :: rest ->
// Note, we do not update the 'm', any incomplete-interpolation error
// will be reported w.r.t. the first '{'
args.stringNest <- (counter - 1, style, d, altR, m) :: rest
let cont = LexCont.Token(args.ifdefStack, args.stringNest)
RBRACE cont
| _ ->
let cont = LexCont.Token(args.ifdefStack, args.stringNest)
RBRACE cont
}
| "$" { DOLLAR }
| "%" { PERCENT_OP("%") }
| "%%" { PERCENT_OP("%%") }
| "-" { MINUS }
| "~"
{ errorR (Error(FSComp.SR.lexInvalidIdentifier(), lexbuf.LexemeRange))
RESERVED }
| ignored_op_char* '*' '*' op_char* { checkExprOp lexbuf; INFIX_STAR_STAR_OP(lexeme lexbuf) }
| ignored_op_char* ('*' | '/'|'%') op_char* { checkExprOp lexbuf; INFIX_STAR_DIV_MOD_OP(lexeme lexbuf) }
| ignored_op_char* ('+'|'-') op_char* { checkExprOp lexbuf; PLUS_MINUS_OP(lexeme lexbuf) }
| ignored_op_char* ('@'|'^') op_char* { checkExprOp lexbuf; INFIX_AT_HAT_OP(lexeme lexbuf) }
| ignored_op_char* ('=' | "!=" | '<' | '$') op_char* { checkExprOp lexbuf; INFIX_COMPARE_OP(lexeme lexbuf) }
| ignored_op_char* ('>') op_char* { checkExprGreaterColonOp lexbuf; INFIX_COMPARE_OP(lexeme lexbuf) }
| ignored_op_char* ('&') op_char* { checkExprOp lexbuf; INFIX_AMP_OP(lexeme lexbuf) }
| ignored_op_char* '|' op_char* { checkExprOp lexbuf; INFIX_BAR_OP(lexeme lexbuf) }
| ignored_op_char* ('!' | '~' ) op_char* { checkExprOp lexbuf; PREFIX_OP(lexeme lexbuf) }
| ".[]" | ".[]<-" | ".[,]<-" | ".[,,]<-" | ".[,,,]<-" | ".[,,,]" | ".[,,]" | ".[,]" | ".[..]" | ".[..,..]" | ".[..,..,..]" | ".[..,..,..,..]"