55
66//! Utilities for parsing numbers in various formats
77
8- // spell-checker:ignore powf copysign prec inity infinit bigdecimal extendedbigdecimal biguint underflowed
8+ // spell-checker:ignore powf copysign prec inity infinit infs bigdecimal extendedbigdecimal biguint underflowed
99
1010use bigdecimal:: {
1111 BigDecimal , Context ,
@@ -35,7 +35,7 @@ enum Base {
3535
3636impl Base {
3737 /// Return the digit value of a character in the given base
38- pub fn digit ( & self , c : char ) -> Option < u64 > {
38+ fn digit ( & self , c : char ) -> Option < u64 > {
3939 fn from_decimal ( c : char ) -> u64 {
4040 u64:: from ( c) - u64:: from ( '0' )
4141 }
@@ -50,6 +50,34 @@ impl Base {
5050 } ,
5151 }
5252 }
53+
54+ /// Greedily parse as many digits as possible from the string
55+ /// Returns parsed digits (if any), and the rest of the string.
56+ fn parse_digits < ' a > ( & self , str : & ' a str ) -> ( Option < BigUint > , & ' a str ) {
57+ let ( digits, _, rest) = self . parse_digits_count ( str, None ) ;
58+ ( digits, rest)
59+ }
60+
61+ /// Greedily parse as many digits as possible from the string, adding to already parsed digits.
62+ /// This is meant to be used (directly) for the part after a decimal point.
63+ /// Returns parsed digits (if any), the number of parsed digits, and the rest of the string.
64+ fn parse_digits_count < ' a > (
65+ & self ,
66+ str : & ' a str ,
67+ digits : Option < BigUint > ,
68+ ) -> ( Option < BigUint > , u64 , & ' a str ) {
69+ let mut digits: Option < BigUint > = digits;
70+ let mut count: u64 = 0 ;
71+ let mut rest = str;
72+ while let Some ( d) = rest. chars ( ) . next ( ) . and_then ( |c| self . digit ( c) ) {
73+ ( digits, count) = (
74+ Some ( digits. unwrap_or_default ( ) * * self as u8 + d) ,
75+ count + 1 ,
76+ ) ;
77+ rest = & rest[ 1 ..] ;
78+ }
79+ ( digits, count, rest)
80+ }
5381}
5482
5583/// Type returned if a number could not be parsed in its entirety
@@ -235,10 +263,69 @@ impl ExtendedParser for ExtendedBigDecimal {
235263 }
236264}
237265
266+ fn parse_digits ( base : Base , str : & str , fractional : bool ) -> ( Option < BigUint > , u64 , & str ) {
267+ // Parse the integral part of the number
268+ let ( digits, rest) = base. parse_digits ( str) ;
269+
270+ // If allowed, parse the fractional part of the number if there can be one and the
271+ // input contains a '.' decimal separator.
272+ if fractional {
273+ if let Some ( rest) = rest. strip_prefix ( '.' ) {
274+ return base. parse_digits_count ( rest, digits) ;
275+ }
276+ }
277+
278+ ( digits, 0 , rest)
279+ }
280+
281+ fn parse_exponent ( base : Base , str : & str ) -> ( Option < BigInt > , & str ) {
282+ let exp_chars = match base {
283+ Base :: Decimal => [ 'e' , 'E' ] ,
284+ Base :: Hexadecimal => [ 'p' , 'P' ] ,
285+ _ => unreachable ! ( ) ,
286+ } ;
287+
288+ // Parse the exponent part, only decimal numbers are allowed.
289+ // We only update `rest` if an exponent is actually parsed.
290+ if let Some ( rest) = str. strip_prefix ( exp_chars) {
291+ let ( sign, rest) = if let Some ( rest) = rest. strip_prefix ( '-' ) {
292+ ( Sign :: Minus , rest)
293+ } else if let Some ( rest) = rest. strip_prefix ( '+' ) {
294+ ( Sign :: Plus , rest)
295+ } else {
296+ // Something else, or nothing at all: keep going.
297+ ( Sign :: Plus , rest) // No explicit sign is equivalent to `+`.
298+ } ;
299+
300+ let ( exp_uint, rest) = Base :: Decimal . parse_digits ( rest) ;
301+ if let Some ( exp_uint) = exp_uint {
302+ return ( Some ( BigInt :: from_biguint ( sign, exp_uint) ) , rest) ;
303+ }
304+ }
305+
306+ // Nothing parsed
307+ ( None , str)
308+ }
309+
310+ // Parse a multiplier from allowed suffixes (e.g. s/m/h).
311+ fn parse_suffix_multiplier < ' a > ( str : & ' a str , allowed_suffixes : & [ ( char , u32 ) ] ) -> ( u32 , & ' a str ) {
312+ if let Some ( ch) = str. chars ( ) . next ( ) {
313+ if let Some ( mul) = allowed_suffixes
314+ . iter ( )
315+ . find_map ( |( c, t) | ( ch == * c) . then_some ( * t) )
316+ {
317+ return ( mul, & str[ 1 ..] ) ;
318+ }
319+ }
320+
321+ // No suffix, just return 1 and intact string
322+ ( 1 , str)
323+ }
324+
238325fn parse_special_value < ' a > (
239326 input : & ' a str ,
240327 negative : bool ,
241- allowed_suffixes : & ' a [ ( char , u32 ) ] ,
328+ allowed_suffixes : & [ ( char , u32 ) ] ,
242329) -> Result < ExtendedBigDecimal , ExtendedParserError < ' a , ExtendedBigDecimal > > {
243330 let input_lc = input. to_ascii_lowercase ( ) ;
244331
@@ -255,21 +342,14 @@ fn parse_special_value<'a>(
255342 if negative {
256343 special = -special;
257344 }
258- let mut match_len = str. len ( ) ;
259- if let Some ( ch) = input. chars ( ) . nth ( str. chars ( ) . count ( ) ) {
260- if allowed_suffixes. iter ( ) . any ( |( c, _) | ch == * c) {
261- // multiplying is unnecessary for these special values, but we have to note that
262- // we processed the character to avoid a partial match error
263- match_len += 1 ;
264- }
265- }
266- return if input. len ( ) == match_len {
345+
346+ // "infs" is a valid duration, so parse suffix multiplier in the original input string, but ignore the multiplier.
347+ let ( _, rest) = parse_suffix_multiplier ( & input[ str. len ( ) ..] , allowed_suffixes) ;
348+
349+ return if rest. is_empty ( ) {
267350 Ok ( special)
268351 } else {
269- Err ( ExtendedParserError :: PartialMatch (
270- special,
271- & input[ match_len..] ,
272- ) )
352+ Err ( ExtendedParserError :: PartialMatch ( special, rest) )
273353 } ;
274354 }
275355 }
@@ -396,13 +476,10 @@ pub(crate) enum ParseTarget {
396476 Duration ,
397477}
398478
399- // TODO: As highlighted by clippy, this function _is_ high cognitive complexity, jumps
400- // around between integer and float parsing, and should be split in multiple parts.
401- #[ allow( clippy:: cognitive_complexity) ]
402479pub ( crate ) fn parse < ' a > (
403480 input : & ' a str ,
404481 target : ParseTarget ,
405- allowed_suffixes : & ' a [ ( char , u32 ) ] ,
482+ allowed_suffixes : & [ ( char , u32 ) ] ,
406483) -> Result < ExtendedBigDecimal , ExtendedParserError < ' a , ExtendedBigDecimal > > {
407484 // Parse the " and ' prefixes separately
408485 if target != ParseTarget :: Duration {
@@ -451,78 +528,30 @@ pub(crate) fn parse<'a>(
451528 ( Base :: Decimal , unsigned)
452529 } ;
453530
454- // Parse the integral part of the number
455- let mut chars = rest. chars ( ) . enumerate ( ) . fuse ( ) . peekable ( ) ;
456- let mut digits: Option < BigUint > = None ;
457- let mut scale = 0u64 ;
458- let mut exponent: Option < BigInt > = None ;
459- while let Some ( d) = chars. peek ( ) . and_then ( |& ( _, c) | base. digit ( c) ) {
460- chars. next ( ) ;
461- digits = Some ( digits. unwrap_or_default ( ) * base as u8 + d) ;
462- }
463-
464- // Parse fractional/exponent part of the number for supported bases.
465- if matches ! ( base, Base :: Decimal | Base :: Hexadecimal ) && target != ParseTarget :: Integral {
466- // Parse the fractional part of the number if there can be one and the input contains
467- // a '.' decimal separator.
468- if matches ! ( chars. peek( ) , Some ( & ( _, '.' ) ) ) {
469- chars. next ( ) ;
470- while let Some ( d) = chars. peek ( ) . and_then ( |& ( _, c) | base. digit ( c) ) {
471- chars. next ( ) ;
472- ( digits, scale) = ( Some ( digits. unwrap_or_default ( ) * base as u8 + d) , scale + 1 ) ;
473- }
474- }
531+ // We only parse fractional and exponent part of the number in base 10/16 floating point numbers.
532+ let parse_frac_exp =
533+ matches ! ( base, Base :: Decimal | Base :: Hexadecimal ) && target != ParseTarget :: Integral ;
475534
476- let exp_char = match base {
477- Base :: Decimal => 'e' ,
478- Base :: Hexadecimal => 'p' ,
479- _ => unreachable ! ( ) ,
480- } ;
535+ // Parse the integral and fractional (if supported) part of the number
536+ let ( digits, scale, rest) = parse_digits ( base, rest, parse_frac_exp) ;
481537
482- // Parse the exponent part, only decimal numbers are allowed.
483- if chars
484- . peek ( )
485- . is_some_and ( |& ( _, c) | c. to_ascii_lowercase ( ) == exp_char)
486- {
487- // Save the iterator position in case we do not parse any exponent.
488- let save_chars = chars. clone ( ) ;
489- chars. next ( ) ;
490- let exp_negative = match chars. peek ( ) {
491- Some ( ( _, '-' ) ) => {
492- chars. next ( ) ;
493- true
494- }
495- Some ( ( _, '+' ) ) => {
496- chars. next ( ) ;
497- false
498- }
499- _ => false , // Something else, or nothing at all: keep going.
500- } ;
501- while let Some ( d) = chars. peek ( ) . and_then ( |& ( _, c) | Base :: Decimal . digit ( c) ) {
502- chars. next ( ) ;
503- exponent = Some ( exponent. unwrap_or_default ( ) * 10 + d as i64 ) ;
504- }
505- if let Some ( exp) = & exponent {
506- if exp_negative {
507- exponent = Some ( -exp) ;
508- }
509- } else {
510- // No exponent actually parsed, reset iterator to return partial match.
511- chars = save_chars;
512- }
513- }
514- }
538+ // Parse exponent part of the number for supported bases.
539+ let ( exponent, rest) = if parse_frac_exp {
540+ parse_exponent ( base, rest)
541+ } else {
542+ ( None , rest)
543+ } ;
515544
516545 // If no digit has been parsed, check if this is a special value, or declare the parsing unsuccessful
517546 if digits. is_none ( ) {
518547 // If we trimmed an initial `0x`/`0b`, return a partial match.
519- if rest ! = unsigned {
548+ if let Some ( partial ) = unsigned. strip_prefix ( "0" ) {
520549 let ebd = if negative {
521550 ExtendedBigDecimal :: MinusZero
522551 } else {
523552 ExtendedBigDecimal :: zero ( )
524553 } ;
525- return Err ( ExtendedParserError :: PartialMatch ( ebd, & unsigned [ 1 .. ] ) ) ;
554+ return Err ( ExtendedParserError :: PartialMatch ( ebd, partial ) ) ;
526555 }
527556
528557 return if target == ParseTarget :: Integral {
@@ -532,28 +561,19 @@ pub(crate) fn parse<'a>(
532561 } ;
533562 }
534563
535- let mut digits = digits . unwrap ( ) ;
564+ let ( mul , rest ) = parse_suffix_multiplier ( rest , allowed_suffixes ) ;
536565
537- if let Some ( ( _, ch) ) = chars. peek ( ) {
538- if let Some ( times) = allowed_suffixes
539- . iter ( )
540- . find ( |( c, _) | ch == c)
541- . map ( |& ( _, t) | t)
542- {
543- chars. next ( ) ;
544- digits *= times;
545- }
546- }
566+ let digits = digits. unwrap ( ) * mul;
547567
548568 let ebd_result =
549569 construct_extended_big_decimal ( digits, negative, base, scale, exponent. unwrap_or_default ( ) ) ;
550570
551571 // Return what has been parsed so far. If there are extra characters, mark the
552572 // parsing as a partial match.
553- if let Some ( ( first_unparsed , _ ) ) = chars . next ( ) {
573+ if !rest . is_empty ( ) {
554574 Err ( ExtendedParserError :: PartialMatch (
555575 ebd_result. unwrap_or_else ( |e| e. extract ( ) ) ,
556- & rest[ first_unparsed.. ] ,
576+ rest,
557577 ) )
558578 } else {
559579 ebd_result
0 commit comments