1
1
use crate :: lazy:: encoding:: TextEncoding ;
2
2
use crate :: lazy:: raw_stream_item:: RawStreamItem ;
3
3
use crate :: lazy:: text:: encoded_value:: EncodedTextValue ;
4
- use crate :: lazy:: text:: matched:: { MatchedInt , MatchedValue } ;
4
+ use crate :: lazy:: text:: matched:: { MatchedFloat , MatchedInt , MatchedValue } ;
5
5
use crate :: lazy:: text:: parse_result:: IonParseError ;
6
6
use crate :: lazy:: text:: parse_result:: { IonMatchResult , IonParseResult } ;
7
7
use crate :: lazy:: text:: value:: LazyRawTextValue ;
@@ -12,7 +12,7 @@ use nom::character::streaming::{char, digit1, one_of};
12
12
use nom:: combinator:: { map, opt, peek, recognize, success, value} ;
13
13
use nom:: error:: { ErrorKind , ParseError } ;
14
14
use nom:: multi:: many0_count;
15
- use nom:: sequence:: { delimited, pair, preceded, separated_pair, terminated} ;
15
+ use nom:: sequence:: { delimited, pair, preceded, separated_pair, terminated, tuple } ;
16
16
use nom:: { CompareResult , IResult , InputLength , InputTake , Needed , Parser } ;
17
17
use std:: fmt:: { Debug , Formatter } ;
18
18
use std:: iter:: { Copied , Enumerate } ;
@@ -192,6 +192,12 @@ impl<'data> TextBufferView<'data> {
192
192
EncodedTextValue :: new ( MatchedValue :: Int ( matched_int) , self . offset ( ) , length)
193
193
} ,
194
194
) ,
195
+ map (
196
+ match_and_length ( Self :: match_float) ,
197
+ |( matched_float, length) | {
198
+ EncodedTextValue :: new ( MatchedValue :: Float ( matched_float) , self . offset ( ) , length)
199
+ } ,
200
+ ) ,
195
201
// TODO: The other Ion types
196
202
) )
197
203
. map ( |encoded_value| LazyRawTextValue {
@@ -372,6 +378,111 @@ impl<'data> TextBufferView<'data> {
372
378
fn take_base_16_digits1 ( self ) -> IonMatchResult < ' data > {
373
379
take_while1 ( |b : u8 | b. is_ascii_hexdigit ( ) ) ( self )
374
380
}
381
+
382
+ /// Matches an Ion float of any syntax
383
+ fn match_float ( self ) -> IonParseResult < ' data , MatchedFloat > {
384
+ alt ( (
385
+ Self :: match_float_special_value,
386
+ Self :: match_float_numeric_value,
387
+ ) ) ( self )
388
+ }
389
+
390
+ /// Matches special IEEE-754 floating point values, including +/- infinity and NaN.
391
+ fn match_float_special_value ( self ) -> IonParseResult < ' data , MatchedFloat > {
392
+ alt ( (
393
+ value ( MatchedFloat :: NotANumber , tag ( "nan" ) ) ,
394
+ value ( MatchedFloat :: PositiveInfinity , tag ( "+inf" ) ) ,
395
+ value ( MatchedFloat :: NegativeInfinity , tag ( "-inf" ) ) ,
396
+ ) ) ( self )
397
+ }
398
+
399
+ /// Matches numeric IEEE-754 floating point values.
400
+ fn match_float_numeric_value ( self ) -> IonParseResult < ' data , MatchedFloat > {
401
+ terminated (
402
+ recognize ( pair (
403
+ Self :: match_number_with_optional_dot_and_digits,
404
+ Self :: match_float_exponent_marker_and_digits,
405
+ ) ) ,
406
+ Self :: peek_stop_character,
407
+ )
408
+ . map ( |_matched| MatchedFloat :: Numeric )
409
+ . parse ( self )
410
+ }
411
+
412
+ /// Matches a number that may or may not have a decimal place and trailing fractional digits.
413
+ /// If a decimal place is present, there must also be trailing digits.
414
+ /// For example:
415
+ /// 1000
416
+ /// 1000.559
417
+ /// -25.2
418
+ fn match_number_with_optional_dot_and_digits ( self ) -> IonMatchResult < ' data > {
419
+ recognize ( tuple ( (
420
+ opt ( tag ( "-" ) ) ,
421
+ Self :: match_base_10_digits_before_dot,
422
+ opt ( Self :: match_dot_followed_by_base_10_digits) ,
423
+ ) ) ) ( self )
424
+ }
425
+
426
+ /// In a float or decimal, matches the digits that are permitted before the decimal point.
427
+ /// This includes either a single zero, or a non-zero followed by any sequence of digits.
428
+ fn match_digits_before_dot ( self ) -> IonMatchResult < ' data > {
429
+ alt ( (
430
+ tag ( "0" ) ,
431
+ recognize ( pair ( Self :: match_leading_digit, Self :: match_trailing_digits) ) ,
432
+ ) ) ( self )
433
+ }
434
+
435
+ /// Matches a single non-zero base 10 digit.
436
+ fn match_leading_digit ( self ) -> IonMatchResult < ' data > {
437
+ recognize ( one_of ( "123456789" ) ) ( self )
438
+ }
439
+
440
+ /// Matches any number of base 10 digits, allowing underscores at any position except the end.
441
+ fn match_trailing_digits ( self ) -> IonMatchResult < ' data > {
442
+ recognize ( many0_count ( preceded ( opt ( char ( '_' ) ) , digit1) ) ) ( self )
443
+ }
444
+
445
+ /// Recognizes a decimal point followed by any number of base-10 digits.
446
+ fn match_dot_followed_by_base_10_digits ( self ) -> IonMatchResult < ' data > {
447
+ recognize ( preceded ( tag ( "." ) , opt ( Self :: match_digits_after_dot) ) ) ( self )
448
+ }
449
+
450
+ /// Like `match_digits_before_dot`, but allows leading zeros.
451
+ fn match_digits_after_dot ( self ) -> IonMatchResult < ' data > {
452
+ recognize ( terminated (
453
+ // Zero or more digits-followed-by-underscores
454
+ many0_count ( pair ( digit1, char ( '_' ) ) ) ,
455
+ // One or more digits
456
+ digit1,
457
+ ) ) ( self )
458
+ }
459
+
460
+ /// Matches an `e` or `E` followed by an optional sign (`+` or `-`) followed by one or more
461
+ /// base 10 digits.
462
+ fn match_float_exponent_marker_and_digits ( self ) -> IonMatchResult < ' data > {
463
+ preceded ( one_of ( "eE" ) , Self :: match_exponent_sign_and_digits) ( self )
464
+ }
465
+
466
+ /// Recognizes the exponent portion of a decimal (everything after the 'd') or float
467
+ /// (everything after the 'e'). This includes:
468
+ /// * an optional '+' OR '-'
469
+ /// * any number of decimal digits, which may:
470
+ /// * have underscores in between them: `1_000_000`
471
+ /// * have one or more leading zeros: `0005`
472
+ fn match_exponent_sign_and_digits ( self ) -> IonMatchResult < ' data > {
473
+ recognize ( pair (
474
+ // Optional leading sign; if there's no sign, it's not negative.
475
+ opt ( Self :: match_any_sign) ,
476
+ Self :: match_digits_after_dot,
477
+ ) ) ( self )
478
+ }
479
+
480
+ /// Matches `-` OR `+`.
481
+ ///
482
+ /// This is used for matching exponent signs; most places in Ion do not allow `+`.
483
+ pub fn match_any_sign ( self ) -> IonMatchResult < ' data > {
484
+ alt ( ( tag ( "+" ) , tag ( "-" ) ) ) ( self )
485
+ }
375
486
}
376
487
377
488
// === nom trait implementations ===
@@ -602,7 +713,12 @@ mod tests {
602
713
{
603
714
let result = self . try_match ( parser) ;
604
715
// We expect this to fail for one reason or another
605
- result. unwrap_err ( ) ;
716
+ assert ! (
717
+ result. is_err( ) ,
718
+ "Expected a parse failure for input: {:?}\n Result: {:?}" ,
719
+ self . input,
720
+ result
721
+ ) ;
606
722
}
607
723
}
608
724
@@ -729,4 +845,38 @@ mod tests {
729
845
mismatch_int ( input) ;
730
846
}
731
847
}
848
+
849
+ #[ test]
850
+ fn test_match_float ( ) {
851
+ fn match_float ( input : & str ) {
852
+ MatchTest :: new ( input) . expect_match ( match_length ( TextBufferView :: match_float) ) ;
853
+ }
854
+ fn mismatch_float ( input : & str ) {
855
+ MatchTest :: new ( input) . expect_mismatch ( match_length ( TextBufferView :: match_float) ) ;
856
+ }
857
+
858
+ let good_inputs = & [
859
+ "0.0e0" , "0E0" , "0e0" , "305e1" , "305e+1" , "305e-1" , "305e100" , "305e-100" , "305e+100" ,
860
+ "305.0e1" , "0.279e3" , "279e0" , "279.5e0" , "279.5E0" ,
861
+ ] ;
862
+ for input in good_inputs {
863
+ match_float ( input) ;
864
+ let negative = format ! ( "-{input}" ) ;
865
+ match_float ( & negative) ;
866
+ }
867
+
868
+ let bad_inputs = & [
869
+ "305" , // Integer
870
+ "305e" , // Has exponent delimiter but no exponent
871
+ ".305e" , // No digits before the decimal point
872
+ "305e0.5" , // Fractional exponent
873
+ "305e-0.5" , // Negative fractional exponent
874
+ "0305e1" , // Leading zero
875
+ "+305e1" , // Leading plus sign
876
+ "--305e1" , // Multiple negative signs
877
+ ] ;
878
+ for input in bad_inputs {
879
+ mismatch_float ( input) ;
880
+ }
881
+ }
732
882
}
0 commit comments