@@ -638,8 +638,7 @@ impl char {
638
638
#[ rustc_const_stable( feature = "const_char_len_utf" , since = "1.52.0" ) ]
639
639
#[ inline]
640
640
pub const fn len_utf16 ( self ) -> usize {
641
- let ch = self as u32 ;
642
- if ( ch & 0xFFFF ) == ch { 1 } else { 2 }
641
+ len_utf16 ( self as u32 )
643
642
}
644
643
645
644
/// Encodes this character as UTF-8 into the provided byte buffer,
@@ -709,8 +708,9 @@ impl char {
709
708
/// '𝕊'.encode_utf16(&mut b);
710
709
/// ```
711
710
#[ stable( feature = "unicode_encode_char" , since = "1.15.0" ) ]
711
+ #[ rustc_const_unstable( feature = "const_char_encode_utf16" , issue = "130660" ) ]
712
712
#[ inline]
713
- pub fn encode_utf16 ( self , dst : & mut [ u16 ] ) -> & mut [ u16 ] {
713
+ pub const fn encode_utf16 ( self , dst : & mut [ u16 ] ) -> & mut [ u16 ] {
714
714
encode_utf16_raw ( self as u32 , dst)
715
715
}
716
716
@@ -1747,7 +1747,12 @@ const fn len_utf8(code: u32) -> usize {
1747
1747
}
1748
1748
}
1749
1749
1750
- /// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
1750
+ #[ inline]
1751
+ const fn len_utf16 ( code : u32 ) -> usize {
1752
+ if ( code & 0xFFFF ) == code { 1 } else { 2 }
1753
+ }
1754
+
1755
+ /// Encodes a raw `u32` value as UTF-8 into the provided byte buffer,
1751
1756
/// and then returns the subslice of the buffer that contains the encoded character.
1752
1757
///
1753
1758
/// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
@@ -1801,7 +1806,7 @@ pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
1801
1806
unsafe { slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , len) }
1802
1807
}
1803
1808
1804
- /// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
1809
+ /// Encodes a raw ` u32` value as UTF-16 into the provided `u16` buffer,
1805
1810
/// and then returns the subslice of the buffer that contains the encoded character.
1806
1811
///
1807
1812
/// Unlike `char::encode_utf16`, this method also handles codepoints in the surrogate range.
@@ -1812,28 +1817,33 @@ pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
1812
1817
/// Panics if the buffer is not large enough.
1813
1818
/// A buffer of length 2 is large enough to encode any `char`.
1814
1819
#[ unstable( feature = "char_internals" , reason = "exposed only for libstd" , issue = "none" ) ]
1820
+ #[ rustc_const_unstable( feature = "const_char_encode_utf16" , issue = "130660" ) ]
1815
1821
#[ doc( hidden) ]
1816
1822
#[ inline]
1817
- pub fn encode_utf16_raw ( mut code : u32 , dst : & mut [ u16 ] ) -> & mut [ u16 ] {
1818
- // SAFETY: each arm checks whether there are enough bits to write into
1819
- unsafe {
1820
- if ( code & 0xFFFF ) == code && !dst. is_empty ( ) {
1821
- // The BMP falls through
1822
- * dst. get_unchecked_mut ( 0 ) = code as u16 ;
1823
- slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , 1 )
1824
- } else if dst. len ( ) >= 2 {
1825
- // Supplementary planes break into surrogates.
1823
+ pub const fn encode_utf16_raw ( mut code : u32 , dst : & mut [ u16 ] ) -> & mut [ u16 ] {
1824
+ const fn panic_at_const ( _code : u32 , _len : usize , _dst_len : usize ) {
1825
+ // Note that we cannot format in constant expressions.
1826
+ panic ! ( "encode_utf16: buffer does not have enough bytes to encode code point" ) ;
1827
+ }
1828
+ fn panic_at_rt ( code : u32 , len : usize , dst_len : usize ) {
1829
+ panic ! (
1830
+ "encode_utf16: need {len} bytes to encode U+{code:04X} but buffer has just {dst_len}" ,
1831
+ ) ;
1832
+ }
1833
+ let len = len_utf16 ( code) ;
1834
+ match ( len, & mut * dst) {
1835
+ ( 1 , [ a, ..] ) => {
1836
+ * a = code as u16 ;
1837
+ }
1838
+ ( 2 , [ a, b, ..] ) => {
1826
1839
code -= 0x1_0000 ;
1827
- * dst. get_unchecked_mut ( 0 ) = 0xD800 | ( ( code >> 10 ) as u16 ) ;
1828
- * dst. get_unchecked_mut ( 1 ) = 0xDC00 | ( ( code as u16 ) & 0x3FF ) ;
1829
- slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , 2 )
1830
- } else {
1831
- panic ! (
1832
- "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}" ,
1833
- char :: from_u32_unchecked( code) . len_utf16( ) ,
1834
- code,
1835
- dst. len( ) ,
1836
- )
1840
+
1841
+ * a = ( code >> 10 ) as u16 | 0xD800 ;
1842
+ * b = ( code & 0x3FF ) as u16 | 0xDC00 ;
1837
1843
}
1838
- }
1844
+ // FIXME(const-hack): We would prefer to have streamlined panics when formatters become const-friendly.
1845
+ _ => const_eval_select ( ( code, len, dst. len ( ) ) , panic_at_const, panic_at_rt) ,
1846
+ } ;
1847
+ // SAFETY: `<&mut [u16]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
1848
+ unsafe { slice:: from_raw_parts_mut ( dst. as_mut_ptr ( ) , len) }
1839
1849
}
0 commit comments