Skip to content

Commit d9bba11

Browse files
authored
Rollup merge of #101401 - mx00s:expand-const, r=fee1-dead
Make `char::is_lowercase` and `char::is_uppercase` const Implements #101400.
2 parents 43a7438 + 2b328ea commit d9bba11

File tree

5 files changed

+63
-27
lines changed

5 files changed

+63
-27
lines changed

library/core/src/char/methods.rs

+20-2
Original file line numberDiff line numberDiff line change
@@ -746,10 +746,19 @@ impl char {
746746
/// assert!(!'中'.is_lowercase());
747747
/// assert!(!' '.is_lowercase());
748748
/// ```
749+
///
750+
/// In a const context:
751+
///
752+
/// ```
753+
/// #![feature(const_unicode_case_lookup)]
754+
/// const CAPITAL_DELTA_IS_LOWERCASE: bool = 'Δ'.is_lowercase();
755+
/// assert!(!CAPITAL_DELTA_IS_LOWERCASE);
756+
/// ```
749757
#[must_use]
750758
#[stable(feature = "rust1", since = "1.0.0")]
759+
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
751760
#[inline]
752-
pub fn is_lowercase(self) -> bool {
761+
pub const fn is_lowercase(self) -> bool {
753762
match self {
754763
'a'..='z' => true,
755764
c => c > '\x7f' && unicode::Lowercase(c),
@@ -779,10 +788,19 @@ impl char {
779788
/// assert!(!'中'.is_uppercase());
780789
/// assert!(!' '.is_uppercase());
781790
/// ```
791+
///
792+
/// In a const context:
793+
///
794+
/// ```
795+
/// #![feature(const_unicode_case_lookup)]
796+
/// const CAPITAL_DELTA_IS_UPPERCASE: bool = 'Δ'.is_uppercase();
797+
/// assert!(CAPITAL_DELTA_IS_UPPERCASE);
798+
/// ```
782799
#[must_use]
783800
#[stable(feature = "rust1", since = "1.0.0")]
801+
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
784802
#[inline]
785-
pub fn is_uppercase(self) -> bool {
803+
pub const fn is_uppercase(self) -> bool {
786804
match self {
787805
'A'..='Z' => true,
788806
c => c > '\x7f' && unicode::Uppercase(c),

library/core/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@
143143
#![feature(const_type_id)]
144144
#![feature(const_type_name)]
145145
#![feature(const_default_impls)]
146+
#![feature(const_unicode_case_lookup)]
146147
#![feature(const_unsafecell_get_mut)]
147148
#![feature(core_panic)]
148149
#![feature(duration_consts_float)]

library/core/src/unicode/unicode_data.rs

+22-15
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
///! This file is generated by src/tools/unicode-table-generator; do not edit manually!
22
3+
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
34
#[inline(always)]
4-
fn bitset_search<
5+
const fn bitset_search<
56
const N: usize,
67
const CHUNK_SIZE: usize,
78
const N1: usize,
@@ -17,14 +18,18 @@ fn bitset_search<
1718
let bucket_idx = (needle / 64) as usize;
1819
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
1920
let chunk_piece = bucket_idx % CHUNK_SIZE;
20-
let chunk_idx = if let Some(&v) = chunk_idx_map.get(chunk_map_idx) {
21-
v
21+
// FIXME: const-hack: Revert to `slice::get` after `const_slice_index`
22+
// feature stabilizes.
23+
let chunk_idx = if chunk_map_idx < chunk_idx_map.len() {
24+
chunk_idx_map[chunk_map_idx]
2225
} else {
2326
return false;
2427
};
2528
let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize;
26-
let word = if let Some(word) = bitset_canonical.get(idx) {
27-
*word
29+
// FIXME: const-hack: Revert to `slice::get` after `const_slice_index`
30+
// feature stabilizes.
31+
let word = if idx < bitset_canonical.len() {
32+
bitset_canonical[idx]
2833
} else {
2934
let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()];
3035
let mut word = bitset_canonical[real_idx as usize];
@@ -318,14 +323,14 @@ pub mod grapheme_extend {
318323

319324
#[rustfmt::skip]
320325
pub mod lowercase {
321-
static BITSET_CHUNKS_MAP: [u8; 123] = [
326+
const BITSET_CHUNKS_MAP: &'static [u8; 123] = &[
322327
14, 17, 0, 0, 9, 0, 0, 12, 13, 10, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
323328
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324329
0, 0, 0, 4, 1, 0, 15, 0, 8, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
325330
0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0,
326331
3, 0, 0, 7,
327332
];
328-
static BITSET_INDEX_CHUNKS: [[u8; 16]; 19] = [
333+
const BITSET_INDEX_CHUNKS: &'static [[u8; 16]; 19] = &[
329334
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
330335
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 0, 0],
331336
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 14, 55, 0],
@@ -346,7 +351,7 @@ pub mod lowercase {
346351
[16, 49, 2, 20, 66, 9, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0],
347352
[63, 39, 54, 12, 73, 61, 18, 1, 6, 62, 71, 19, 68, 69, 3, 44],
348353
];
349-
static BITSET_CANONICAL: [u64; 55] = [
354+
const BITSET_CANONICAL: &'static [u64; 55] = &[
350355
0b0000000000000000000000000000000000000000000000000000000000000000,
351356
0b1111111111111111110000000000000000000000000011111111111111111111,
352357
0b1010101010101010101010101010101010101010101010101010100000000010,
@@ -403,13 +408,14 @@ pub mod lowercase {
403408
0b1110011111111111111111111111111111111111111111110000000000000000,
404409
0b1110101111000000000000000000000000001111111111111111111111111100,
405410
];
406-
static BITSET_MAPPING: [(u8, u8); 20] = [
411+
const BITSET_MAPPING: &'static [(u8, u8); 20] = &[
407412
(0, 64), (1, 188), (1, 183), (1, 176), (1, 109), (1, 124), (1, 126), (1, 66), (1, 70),
408413
(1, 77), (2, 146), (2, 144), (2, 83), (3, 12), (3, 6), (4, 156), (4, 78), (5, 187),
409414
(6, 132), (7, 93),
410415
];
411416

412-
pub fn lookup(c: char) -> bool {
417+
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
418+
pub const fn lookup(c: char) -> bool {
413419
super::bitset_search(
414420
c as u32,
415421
&BITSET_CHUNKS_MAP,
@@ -454,14 +460,14 @@ pub mod n {
454460

455461
#[rustfmt::skip]
456462
pub mod uppercase {
457-
static BITSET_CHUNKS_MAP: [u8; 125] = [
463+
const BITSET_CHUNKS_MAP: &'static [u8; 125] = &[
458464
12, 15, 6, 6, 0, 6, 6, 2, 4, 11, 6, 16, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
459465
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
460466
6, 6, 6, 5, 6, 14, 6, 10, 6, 6, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
461467
6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 13, 6, 6,
462468
6, 6, 9, 6, 3,
463469
];
464-
static BITSET_INDEX_CHUNKS: [[u8; 16]; 17] = [
470+
const BITSET_INDEX_CHUNKS: &'static [[u8; 16]; 17] = &[
465471
[43, 43, 5, 34, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 5, 1],
466472
[43, 43, 5, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43],
467473
[43, 43, 39, 43, 43, 43, 43, 43, 17, 17, 62, 17, 42, 29, 24, 23],
@@ -480,7 +486,7 @@ pub mod uppercase {
480486
[57, 19, 2, 18, 10, 47, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43],
481487
[57, 37, 17, 27, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43],
482488
];
483-
static BITSET_CANONICAL: [u64; 43] = [
489+
const BITSET_CANONICAL: &'static [u64; 43] = &[
484490
0b0000011111111111111111111111111000000000000000000000000000000000,
485491
0b0000000000111111111111111111111111111111111111111111111111111111,
486492
0b0101010101010101010101010101010101010101010101010101010000000001,
@@ -525,13 +531,14 @@ pub mod uppercase {
525531
0b1111011111111111000000000000000000000000000000000000000000000000,
526532
0b1111111100000000111111110000000000111111000000001111111100000000,
527533
];
528-
static BITSET_MAPPING: [(u8, u8); 25] = [
534+
const BITSET_MAPPING: &'static [(u8, u8); 25] = &[
529535
(0, 187), (0, 177), (0, 171), (0, 167), (0, 164), (0, 32), (0, 47), (0, 51), (0, 121),
530536
(0, 117), (0, 109), (1, 150), (1, 148), (1, 142), (1, 134), (1, 131), (1, 64), (2, 164),
531537
(2, 146), (2, 20), (3, 146), (3, 140), (3, 134), (4, 178), (4, 171),
532538
];
533539

534-
pub fn lookup(c: char) -> bool {
540+
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
541+
pub const fn lookup(c: char) -> bool {
535542
super::bitset_search(
536543
c as u32,
537544
&BITSET_CHUNKS_MAP,

src/tools/unicode-table-generator/src/range_search.rs

+10-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
12
#[inline(always)]
2-
fn bitset_search<
3+
const fn bitset_search<
34
const N: usize,
45
const CHUNK_SIZE: usize,
56
const N1: usize,
@@ -15,14 +16,18 @@ fn bitset_search<
1516
let bucket_idx = (needle / 64) as usize;
1617
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
1718
let chunk_piece = bucket_idx % CHUNK_SIZE;
18-
let chunk_idx = if let Some(&v) = chunk_idx_map.get(chunk_map_idx) {
19-
v
19+
// FIXME: const-hack: Revert to `slice::get` after `const_slice_index`
20+
// feature stabilizes.
21+
let chunk_idx = if chunk_map_idx < chunk_idx_map.len() {
22+
chunk_idx_map[chunk_map_idx]
2023
} else {
2124
return false;
2225
};
2326
let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize;
24-
let word = if let Some(word) = bitset_canonical.get(idx) {
25-
*word
27+
// FIXME: const-hack: Revert to `slice::get` after `const_slice_index`
28+
// feature stabilizes.
29+
let word = if idx < bitset_canonical.len() {
30+
bitset_canonical[idx]
2631
} else {
2732
let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()];
2833
let mut word = bitset_canonical[real_idx as usize];

src/tools/unicode-table-generator/src/raw_emitter.rs

+10-5
Original file line numberDiff line numberDiff line change
@@ -76,15 +76,15 @@ impl RawEmitter {
7676

7777
writeln!(
7878
&mut self.file,
79-
"static BITSET_CANONICAL: [u64; {}] = [{}];",
79+
"const BITSET_CANONICAL: &'static [u64; {}] = &[{}];",
8080
canonicalized.canonical_words.len(),
8181
fmt_list(canonicalized.canonical_words.iter().map(|v| Bits(*v))),
8282
)
8383
.unwrap();
8484
self.bytes_used += 8 * canonicalized.canonical_words.len();
8585
writeln!(
8686
&mut self.file,
87-
"static BITSET_MAPPING: [(u8, u8); {}] = [{}];",
87+
"const BITSET_MAPPING: &'static [(u8, u8); {}] = &[{}];",
8888
canonicalized.canonicalized_words.len(),
8989
fmt_list(&canonicalized.canonicalized_words),
9090
)
@@ -96,7 +96,12 @@ impl RawEmitter {
9696

9797
self.blank_line();
9898

99-
writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap();
99+
writeln!(
100+
&mut self.file,
101+
r#"#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]"#
102+
)
103+
.unwrap();
104+
writeln!(&mut self.file, "pub const fn lookup(c: char) -> bool {{").unwrap();
100105
writeln!(&mut self.file, " super::bitset_search(",).unwrap();
101106
writeln!(&mut self.file, " c as u32,").unwrap();
102107
writeln!(&mut self.file, " &BITSET_CHUNKS_MAP,").unwrap();
@@ -130,15 +135,15 @@ impl RawEmitter {
130135

131136
writeln!(
132137
&mut self.file,
133-
"static BITSET_CHUNKS_MAP: [u8; {}] = [{}];",
138+
"const BITSET_CHUNKS_MAP: &'static [u8; {}] = &[{}];",
134139
chunk_indices.len(),
135140
fmt_list(&chunk_indices),
136141
)
137142
.unwrap();
138143
self.bytes_used += chunk_indices.len();
139144
writeln!(
140145
&mut self.file,
141-
"static BITSET_INDEX_CHUNKS: [[u8; {}]; {}] = [{}];",
146+
"const BITSET_INDEX_CHUNKS: &'static [[u8; {}]; {}] = &[{}];",
142147
chunk_length,
143148
chunks.len(),
144149
fmt_list(chunks.iter()),

0 commit comments

Comments
 (0)