@@ -268,7 +268,7 @@ macro_rules! impl_Display {
268
268
// Format per two digits from the lookup table.
269
269
if remain > 9 {
270
270
// SAFETY: All of the decimals fit in buf due to MAX_DEC_N
271
- // and the while condition ensures at least 2 more decimals.
271
+ // and the if condition ensures at least 2 more decimals.
272
272
unsafe { core:: hint:: assert_unchecked( offset >= 2 ) }
273
273
// SAFETY: The offset counts down from its initial buf.len()
274
274
// without underflow due to the previous precondition.
@@ -555,93 +555,6 @@ mod imp {
555
555
}
556
556
impl_Exp ! ( i128 , u128 as u128 via to_u128 named exp_u128) ;
557
557
558
- /// Helper function for writing a u64 into `buf` going from last to first, with `curr`.
559
- fn parse_u64_into < const N : usize > ( mut n : u64 , buf : & mut [ MaybeUninit < u8 > ; N ] , curr : & mut usize ) {
560
- let buf_ptr = MaybeUninit :: slice_as_mut_ptr ( buf) ;
561
- let lut_ptr = DEC_DIGITS_LUT . as_ptr ( ) ;
562
- assert ! ( * curr > 19 ) ;
563
-
564
- // SAFETY:
565
- // Writes at most 19 characters into the buffer. Guaranteed that any ptr into LUT is at most
566
- // 198, so will never OOB. There is a check above that there are at least 19 characters
567
- // remaining.
568
- unsafe {
569
- if n >= 1e16 as u64 {
570
- let to_parse = n % 1e16 as u64 ;
571
- n /= 1e16 as u64 ;
572
-
573
- // Some of these are nops but it looks more elegant this way.
574
- let d1 = ( ( to_parse / 1e14 as u64 ) % 100 ) << 1 ;
575
- let d2 = ( ( to_parse / 1e12 as u64 ) % 100 ) << 1 ;
576
- let d3 = ( ( to_parse / 1e10 as u64 ) % 100 ) << 1 ;
577
- let d4 = ( ( to_parse / 1e8 as u64 ) % 100 ) << 1 ;
578
- let d5 = ( ( to_parse / 1e6 as u64 ) % 100 ) << 1 ;
579
- let d6 = ( ( to_parse / 1e4 as u64 ) % 100 ) << 1 ;
580
- let d7 = ( ( to_parse / 1e2 as u64 ) % 100 ) << 1 ;
581
- let d8 = ( ( to_parse / 1e0 as u64 ) % 100 ) << 1 ;
582
-
583
- * curr -= 16 ;
584
-
585
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d1 as usize ) , buf_ptr. add ( * curr + 0 ) , 2 ) ;
586
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d2 as usize ) , buf_ptr. add ( * curr + 2 ) , 2 ) ;
587
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d3 as usize ) , buf_ptr. add ( * curr + 4 ) , 2 ) ;
588
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d4 as usize ) , buf_ptr. add ( * curr + 6 ) , 2 ) ;
589
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d5 as usize ) , buf_ptr. add ( * curr + 8 ) , 2 ) ;
590
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d6 as usize ) , buf_ptr. add ( * curr + 10 ) , 2 ) ;
591
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d7 as usize ) , buf_ptr. add ( * curr + 12 ) , 2 ) ;
592
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d8 as usize ) , buf_ptr. add ( * curr + 14 ) , 2 ) ;
593
- }
594
- if n >= 1e8 as u64 {
595
- let to_parse = n % 1e8 as u64 ;
596
- n /= 1e8 as u64 ;
597
-
598
- // Some of these are nops but it looks more elegant this way.
599
- let d1 = ( ( to_parse / 1e6 as u64 ) % 100 ) << 1 ;
600
- let d2 = ( ( to_parse / 1e4 as u64 ) % 100 ) << 1 ;
601
- let d3 = ( ( to_parse / 1e2 as u64 ) % 100 ) << 1 ;
602
- let d4 = ( ( to_parse / 1e0 as u64 ) % 100 ) << 1 ;
603
- * curr -= 8 ;
604
-
605
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d1 as usize ) , buf_ptr. add ( * curr + 0 ) , 2 ) ;
606
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d2 as usize ) , buf_ptr. add ( * curr + 2 ) , 2 ) ;
607
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d3 as usize ) , buf_ptr. add ( * curr + 4 ) , 2 ) ;
608
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d4 as usize ) , buf_ptr. add ( * curr + 6 ) , 2 ) ;
609
- }
610
- // `n` < 1e8 < (1 << 32)
611
- let mut n = n as u32 ;
612
- if n >= 1e4 as u32 {
613
- let to_parse = n % 1e4 as u32 ;
614
- n /= 1e4 as u32 ;
615
-
616
- let d1 = ( to_parse / 100 ) << 1 ;
617
- let d2 = ( to_parse % 100 ) << 1 ;
618
- * curr -= 4 ;
619
-
620
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d1 as usize ) , buf_ptr. add ( * curr + 0 ) , 2 ) ;
621
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d2 as usize ) , buf_ptr. add ( * curr + 2 ) , 2 ) ;
622
- }
623
-
624
- // `n` < 1e4 < (1 << 16)
625
- let mut n = n as u16 ;
626
- if n >= 100 {
627
- let d1 = ( n % 100 ) << 1 ;
628
- n /= 100 ;
629
- * curr -= 2 ;
630
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d1 as usize ) , buf_ptr. add ( * curr) , 2 ) ;
631
- }
632
-
633
- // decode last 1 or 2 chars
634
- if n < 10 {
635
- * curr -= 1 ;
636
- * buf_ptr. add ( * curr) = ( n as u8 ) + b'0' ;
637
- } else {
638
- let d1 = n << 1 ;
639
- * curr -= 2 ;
640
- ptr:: copy_nonoverlapping ( lut_ptr. add ( d1 as usize ) , buf_ptr. add ( * curr) , 2 ) ;
641
- }
642
- }
643
- }
644
-
645
558
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
646
559
impl fmt:: Display for u128 {
647
560
fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
@@ -652,96 +565,160 @@ impl fmt::Display for u128 {
652
565
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
653
566
impl fmt:: Display for i128 {
654
567
fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
655
- let is_nonnegative = * self >= 0 ;
656
- let n = if is_nonnegative {
657
- self . to_u128 ( )
658
- } else {
659
- // convert the negative num to positive by summing 1 to its 2s complement
660
- ( !self . to_u128 ( ) ) . wrapping_add ( 1 )
661
- } ;
662
- fmt_u128 ( n, is_nonnegative, f)
568
+ fmt_u128 ( self . unsigned_abs ( ) , * self >= 0 , f)
663
569
}
664
570
}
665
571
666
- /// Specialized optimization for u128. Instead of taking two items at a time, it splits
667
- /// into at most 2 u64s, and then chunks by 10e16, 10e8, 10e4, 10e2, and then 10e1.
668
- /// It also has to handle 1 last item, as 10^40 > 2^128 > 10^39, whereas
669
- /// 10^20 > 2^64 > 10^19.
572
+ /// Format optimized for u128. Computation of 128 bits is limited by proccessing
573
+ /// in batches of 16 decimals at a time.
670
574
fn fmt_u128 ( n : u128 , is_nonnegative : bool , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
671
- // 2^128 is about 3*10^38, so 39 gives an extra byte of space
672
- let mut buf = [ MaybeUninit :: < u8 > :: uninit ( ) ; 39 ] ;
673
- let mut curr = buf. len ( ) ;
674
-
675
- let ( n, rem) = udiv_1e19 ( n) ;
676
- parse_u64_into ( rem, & mut buf, & mut curr) ;
677
-
678
- if n != 0 {
679
- // 0 pad up to point
680
- let target = buf. len ( ) - 19 ;
681
- // SAFETY: Guaranteed that we wrote at most 19 bytes, and there must be space
682
- // remaining since it has length 39
683
- unsafe {
684
- ptr:: write_bytes (
685
- MaybeUninit :: slice_as_mut_ptr ( & mut buf) . add ( target) ,
686
- b'0' ,
687
- curr - target,
688
- ) ;
689
- }
690
- curr = target;
691
-
692
- let ( n, rem) = udiv_1e19 ( n) ;
693
- parse_u64_into ( rem, & mut buf, & mut curr) ;
694
- // Should this following branch be annotated with unlikely?
695
- if n != 0 {
696
- let target = buf. len ( ) - 38 ;
697
- // The raw `buf_ptr` pointer is only valid until `buf` is used the next time,
698
- // buf `buf` is not used in this scope so we are good.
699
- let buf_ptr = MaybeUninit :: slice_as_mut_ptr ( & mut buf) ;
700
- // SAFETY: At this point we wrote at most 38 bytes, pad up to that point,
701
- // There can only be at most 1 digit remaining.
702
- unsafe {
703
- ptr:: write_bytes ( buf_ptr. add ( target) , b'0' , curr - target) ;
704
- curr = target - 1 ;
705
- * buf_ptr. add ( curr) = ( n as u8 ) + b'0' ;
706
- }
575
+ // Optimize common-case zero, which would also need special treatment due to
576
+ // its "leading" zero.
577
+ if n == 0 {
578
+ return f. pad_integral ( true , "" , "0" ) ;
579
+ }
580
+
581
+ // U128::MAX has 39 significant-decimals.
582
+ const MAX_DEC_N : usize = 39 ;
583
+ // Buffer decimals with right alignment.
584
+ let mut buf = [ MaybeUninit :: < u8 > :: uninit ( ) ; MAX_DEC_N ] ;
585
+ // Count the number of bytes in buf that are not initialized.
586
+ let mut offset = buf. len ( ) ;
587
+
588
+ // Take the 16 least-significant decimals.
589
+ let ( n, mod_1e16) = div_rem_1e16 ( n) ;
590
+ let mut remain = if n == 0 {
591
+ mod_1e16
592
+ } else {
593
+ // write buf[23..39]
594
+ enc_16lsd :: < 23 > ( & mut buf, mod_1e16) ;
595
+ offset = 23 ;
596
+
597
+ // Take another 16 decimals.
598
+ let ( n, mod_1e16) = div_rem_1e16 ( n) ;
599
+ if n == 0 {
600
+ mod_1e16
601
+ } else {
602
+ // write buf[7..23]
603
+ enc_16lsd :: < 7 > ( & mut buf, mod_1e16) ;
604
+ offset = 7 ;
605
+
606
+ debug_assert ! ( n < 10 ) ;
607
+ n as u64
707
608
}
609
+ } ;
610
+
611
+ // Format per four digits from the lookup table.
612
+ while remain > 999 {
613
+ // SAFETY: All of the decimals fit in buf due to MAX_DEC_N
614
+ // and the while condition ensures at least 4 more decimals.
615
+ unsafe { core:: hint:: assert_unchecked ( offset >= 4 ) }
616
+ // SAFETY: The offset counts down from its initial buf.len()
617
+ // without underflow due to the previous precondition.
618
+ unsafe { core:: hint:: assert_unchecked ( offset <= buf. len ( ) ) }
619
+ offset -= 4 ;
620
+
621
+ // pull two pairs
622
+ let quad = remain % 1_00_00 ;
623
+ remain /= 1_00_00 ;
624
+ let pair1 = ( quad / 100 ) as usize ;
625
+ let pair2 = ( quad % 100 ) as usize ;
626
+ buf[ offset + 0 ] . write ( DEC_DIGITS_LUT [ pair1 * 2 + 0 ] ) ;
627
+ buf[ offset + 1 ] . write ( DEC_DIGITS_LUT [ pair1 * 2 + 1 ] ) ;
628
+ buf[ offset + 2 ] . write ( DEC_DIGITS_LUT [ pair2 * 2 + 0 ] ) ;
629
+ buf[ offset + 3 ] . write ( DEC_DIGITS_LUT [ pair2 * 2 + 1 ] ) ;
630
+ }
631
+
632
+ // Format per two digits from the lookup table.
633
+ if remain > 9 {
634
+ // SAFETY: All of the decimals fit in buf due to MAX_DEC_N
635
+ // and the if condition ensures at least 2 more decimals.
636
+ unsafe { core:: hint:: assert_unchecked ( offset >= 2 ) }
637
+ // SAFETY: The offset counts down from its initial buf.len()
638
+ // without underflow due to the previous precondition.
639
+ unsafe { core:: hint:: assert_unchecked ( offset <= buf. len ( ) ) }
640
+ offset -= 2 ;
641
+
642
+ let pair = ( remain % 100 ) as usize ;
643
+ remain /= 100 ;
644
+ buf[ offset + 0 ] . write ( DEC_DIGITS_LUT [ pair * 2 + 0 ] ) ;
645
+ buf[ offset + 1 ] . write ( DEC_DIGITS_LUT [ pair * 2 + 1 ] ) ;
646
+ }
647
+
648
+ // Format the last remaining digit, if any.
649
+ if remain != 0 {
650
+ // SAFETY: All of the decimals fit in buf due to MAX_DEC_N
651
+ // and the if condition ensures (at least) 1 more decimals.
652
+ unsafe { core:: hint:: assert_unchecked ( offset >= 1 ) }
653
+ // SAFETY: The offset counts down from its initial buf.len()
654
+ // without underflow due to the previous precondition.
655
+ unsafe { core:: hint:: assert_unchecked ( offset <= buf. len ( ) ) }
656
+ offset -= 1 ;
657
+
658
+ // Either the compiler sees that remain < 10, or it prevents
659
+ // a boundary check up next.
660
+ let last = ( remain & 15 ) as usize ;
661
+ buf[ offset] . write ( DEC_DIGITS_LUT [ last * 2 + 1 ] ) ;
662
+ // not used: remain = 0;
708
663
}
709
664
710
- // SAFETY: `curr` > 0 (since we made `buf` large enough), and all the chars are valid
711
- // UTF-8 since `DEC_DIGITS_LUT` is
712
- let buf_slice = unsafe {
665
+ // SAFETY: All buf content since offset is set.
666
+ let written = unsafe { buf. get_unchecked ( offset..) } ;
667
+ // SAFETY: Writes use ASCII from the lookup table exclusively.
668
+ let as_str = unsafe {
713
669
str:: from_utf8_unchecked ( slice:: from_raw_parts (
714
- MaybeUninit :: slice_as_mut_ptr ( & mut buf ) . add ( curr ) ,
715
- buf . len ( ) - curr ,
670
+ MaybeUninit :: slice_as_ptr ( written ) ,
671
+ written . len ( ) ,
716
672
) )
717
673
} ;
718
- f. pad_integral ( is_nonnegative, "" , buf_slice )
674
+ f. pad_integral ( is_nonnegative, "" , as_str )
719
675
}
720
676
721
- /// Partition of `n` into n > 1e19 and rem <= 1e19
677
+ /// Encodes the 16 least significant decimals of n into buf.
678
+ fn enc_16lsd < const OFFSET : usize > ( buf : & mut [ MaybeUninit < u8 > ; 39 ] , n : u64 ) {
679
+ // Consume the least-significant decimals from a working copy.
680
+ let mut remain = n;
681
+
682
+ // Format per four digits from the lookup table.
683
+ for quad_index in ( 0 ..4 ) . rev ( ) {
684
+ // pull two pairs
685
+ let quad = remain % 1_00_00 ;
686
+ remain /= 1_00_00 ;
687
+ let pair1 = ( quad / 100 ) as usize ;
688
+ let pair2 = ( quad % 100 ) as usize ;
689
+ buf[ quad_index * 4 + OFFSET + 0 ] . write ( DEC_DIGITS_LUT [ pair1 * 2 + 0 ] ) ;
690
+ buf[ quad_index * 4 + OFFSET + 1 ] . write ( DEC_DIGITS_LUT [ pair1 * 2 + 1 ] ) ;
691
+ buf[ quad_index * 4 + OFFSET + 2 ] . write ( DEC_DIGITS_LUT [ pair2 * 2 + 0 ] ) ;
692
+ buf[ quad_index * 4 + OFFSET + 3 ] . write ( DEC_DIGITS_LUT [ pair2 * 2 + 1 ] ) ;
693
+ }
694
+ }
695
+
696
+ /// Euclidean division plus remainder with constant 1E16 basically consumes 16
697
+ /// decimals from n.
722
698
///
723
- /// Integer division algorithm is based on the following paper:
699
+ /// The integer division algorithm is based on the following paper:
724
700
///
725
701
/// T. Granlund and P. Montgomery, “Division by Invariant Integers Using Multiplication”
726
702
/// in Proc. of the SIGPLAN94 Conference on Programming Language Design and
727
703
/// Implementation, 1994, pp. 61–72
728
704
///
729
- fn udiv_1e19 ( n : u128 ) -> ( u128 , u64 ) {
730
- const DIV : u64 = 1e19 as u64 ;
731
- const FACTOR : u128 = 156927543384667019095894735580191660403 ;
705
+ #[ inline]
706
+ fn div_rem_1e16 ( n : u128 ) -> ( u128 , u64 ) {
707
+ const D : u128 = 1_0000_0000_0000_0000 ;
708
+ if n < D {
709
+ return ( 0 , n as u64 ) ;
710
+ }
732
711
733
- let quot = if n < 1 << 83 {
734
- ( ( n >> 19 ) as u64 / ( DIV >> 19 ) ) as u128
735
- } else {
736
- u128_mulhi ( n, FACTOR ) >> 62
737
- } ;
712
+ // These constant values are computed with the CHOOSE_MULTIPLIER procedure.
713
+ const M_HIGH : u128 = 76624777043294442917917351357515459181 ;
714
+ const SH_POST : u8 = 51 ;
738
715
739
- let rem = ( n - quot * DIV as u128 ) as u64 ;
740
- ( quot, rem)
716
+ let quot = u128_mulhi ( n, M_HIGH ) >> SH_POST ;
717
+ let rem = n - quot * D ;
718
+ ( quot, rem as u64 )
741
719
}
742
720
743
721
/// Multiply unsigned 128 bit integers, return upper 128 bits of the result
744
- #[ inline]
745
722
fn u128_mulhi ( x : u128 , y : u128 ) -> u128 {
746
723
let x_lo = x as u64 ;
747
724
let x_hi = ( x >> 64 ) as u64 ;
0 commit comments