@@ -18,38 +18,106 @@ const fn backslash<const N: usize>(a: ascii::Char) -> ([ascii::Char; N], Range<u
18
18
( output, 0 ..2 )
19
19
}
20
20
21
+ #[ inline]
22
+ const fn hex_escape < const N : usize > ( byte : u8 ) -> ( [ ascii:: Char ; N ] , Range < u8 > ) {
23
+ const { assert ! ( N >= 4 ) } ;
24
+
25
+ let mut output = [ ascii:: Char :: Null ; N ] ;
26
+
27
+ let hi = HEX_DIGITS [ ( byte >> 4 ) as usize ] ;
28
+ let lo = HEX_DIGITS [ ( byte & 0xf ) as usize ] ;
29
+
30
+ output[ 0 ] = ascii:: Char :: ReverseSolidus ;
31
+ output[ 1 ] = ascii:: Char :: SmallX ;
32
+ output[ 2 ] = hi;
33
+ output[ 3 ] = lo;
34
+
35
+ ( output, 0 ..4 )
36
+ }
37
+
38
+ #[ inline]
39
+ const fn verbatim < const N : usize > ( a : ascii:: Char ) -> ( [ ascii:: Char ; N ] , Range < u8 > ) {
40
+ const { assert ! ( N >= 1 ) } ;
41
+
42
+ let mut output = [ ascii:: Char :: Null ; N ] ;
43
+
44
+ output[ 0 ] = a;
45
+
46
+ ( output, 0 ..1 )
47
+ }
48
+
21
49
/// Escapes an ASCII character.
22
50
///
23
51
/// Returns a buffer and the length of the escaped representation.
24
52
const fn escape_ascii < const N : usize > ( byte : u8 ) -> ( [ ascii:: Char ; N ] , Range < u8 > ) {
25
53
const { assert ! ( N >= 4 ) } ;
26
54
27
- match byte {
28
- b'\t' => backslash ( ascii:: Char :: SmallT ) ,
29
- b'\r' => backslash ( ascii:: Char :: SmallR ) ,
30
- b'\n' => backslash ( ascii:: Char :: SmallN ) ,
31
- b'\\' => backslash ( ascii:: Char :: ReverseSolidus ) ,
32
- b'\'' => backslash ( ascii:: Char :: Apostrophe ) ,
33
- b'\"' => backslash ( ascii:: Char :: QuotationMark ) ,
34
- byte => {
35
- let mut output = [ ascii:: Char :: Null ; N ] ;
36
-
37
- if let Some ( c) = byte. as_ascii ( )
38
- && !byte. is_ascii_control ( )
39
- {
40
- output[ 0 ] = c;
41
- ( output, 0 ..1 )
42
- } else {
43
- let hi = HEX_DIGITS [ ( byte >> 4 ) as usize ] ;
44
- let lo = HEX_DIGITS [ ( byte & 0xf ) as usize ] ;
55
+ #[ cfg( feature = "optimize_for_size" ) ]
56
+ {
57
+ match byte {
58
+ b'\t' => backslash ( ascii:: Char :: SmallT ) ,
59
+ b'\r' => backslash ( ascii:: Char :: SmallR ) ,
60
+ b'\n' => backslash ( ascii:: Char :: SmallN ) ,
61
+ b'\\' => backslash ( ascii:: Char :: ReverseSolidus ) ,
62
+ b'\'' => backslash ( ascii:: Char :: Apostrophe ) ,
63
+ b'"' => backslash ( ascii:: Char :: QuotationMark ) ,
64
+ 0x00 ..=0x1F | 0x7F => hex_escape ( byte) ,
65
+ _ => match ascii:: Char :: from_u8 ( byte) {
66
+ Some ( a) => verbatim ( a) ,
67
+ None => hex_escape ( byte) ,
68
+ } ,
69
+ }
70
+ }
71
+
72
+ #[ cfg( not( feature = "optimize_for_size" ) ) ]
73
+ {
74
+ /// Lookup table helps us determine how to display character.
75
+ ///
76
+ /// Since ASCII characters will always be 7 bits, we can exploit this to store the 8th bit to
77
+ /// indicate whether the result is escaped or unescaped.
78
+ ///
79
+ /// We additionally use 0x80 (escaped NUL character) to indicate hex-escaped bytes, since
80
+ /// escaped NUL will not occur.
81
+ const LOOKUP : [ u8 ; 256 ] = {
82
+ let mut arr = [ 0 ; 256 ] ;
83
+ let mut idx = 0 ;
84
+ while idx <= 255 {
85
+ arr[ idx] = match idx as u8 {
86
+ // use 8th bit to indicate escaped
87
+ b'\t' => 0x80 | b't' ,
88
+ b'\r' => 0x80 | b'r' ,
89
+ b'\n' => 0x80 | b'n' ,
90
+ b'\\' => 0x80 | b'\\' ,
91
+ b'\'' => 0x80 | b'\'' ,
92
+ b'"' => 0x80 | b'"' ,
93
+
94
+ // use NUL to indicate hex-escaped
95
+ 0x00 ..=0x1F | 0x7F ..=0xFF => 0x80 | b'\0' ,
96
+
97
+ idx => idx,
98
+ } ;
99
+ idx += 1 ;
100
+ }
101
+ arr
102
+ } ;
45
103
46
- output[ 0 ] = ascii:: Char :: ReverseSolidus ;
47
- output[ 1 ] = ascii:: Char :: SmallX ;
48
- output[ 2 ] = hi;
49
- output[ 3 ] = lo;
104
+ let lookup = LOOKUP [ byte as usize ] ;
50
105
51
- ( output, 0 ..4 )
106
+ // 8th bit indicates escape
107
+ let lookup_escaped = lookup & 0x80 != 0 ;
108
+
109
+ // SAFETY: We explicitly mask out the eighth bit to get a 7-bit ASCII character.
110
+ let lookup_ascii = unsafe { ascii:: Char :: from_u8_unchecked ( lookup & 0x7F ) } ;
111
+
112
+ if lookup_escaped {
113
+ // NUL indicates hex-escaped
114
+ if matches ! ( lookup_ascii, ascii:: Char :: Null ) {
115
+ hex_escape ( byte)
116
+ } else {
117
+ backslash ( lookup_ascii)
52
118
}
119
+ } else {
120
+ verbatim ( lookup_ascii)
53
121
}
54
122
}
55
123
}
0 commit comments