Skip to content

Commit

Permalink
Rollup merge of rust-lang#101401 - mx00s:expand-const, r=fee1-dead
Browse files Browse the repository at this point in the history
Make `char::is_lowercase` and `char::is_uppercase` const

Implements rust-lang#101400.
  • Loading branch information
matthiaskrgr authored Sep 4, 2022
2 parents 43a7438 + 2b328ea commit d9bba11
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 27 deletions.
22 changes: 20 additions & 2 deletions library/core/src/char/methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -746,10 +746,19 @@ impl char {
/// assert!(!'中'.is_lowercase());
/// assert!(!' '.is_lowercase());
/// ```
///
/// In a const context:
///
/// ```
/// #![feature(const_unicode_case_lookup)]
/// const CAPITAL_DELTA_IS_LOWERCASE: bool = 'Δ'.is_lowercase();
/// assert!(!CAPITAL_DELTA_IS_LOWERCASE);
/// ```
#[must_use]
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
#[inline]
pub fn is_lowercase(self) -> bool {
pub const fn is_lowercase(self) -> bool {
match self {
'a'..='z' => true,
c => c > '\x7f' && unicode::Lowercase(c),
Expand Down Expand Up @@ -779,10 +788,19 @@ impl char {
/// assert!(!'中'.is_uppercase());
/// assert!(!' '.is_uppercase());
/// ```
///
/// In a const context:
///
/// ```
/// #![feature(const_unicode_case_lookup)]
/// const CAPITAL_DELTA_IS_UPPERCASE: bool = 'Δ'.is_uppercase();
/// assert!(CAPITAL_DELTA_IS_UPPERCASE);
/// ```
#[must_use]
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
#[inline]
pub fn is_uppercase(self) -> bool {
pub const fn is_uppercase(self) -> bool {
match self {
'A'..='Z' => true,
c => c > '\x7f' && unicode::Uppercase(c),
Expand Down
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@
#![feature(const_type_id)]
#![feature(const_type_name)]
#![feature(const_default_impls)]
#![feature(const_unicode_case_lookup)]
#![feature(const_unsafecell_get_mut)]
#![feature(core_panic)]
#![feature(duration_consts_float)]
Expand Down
37 changes: 22 additions & 15 deletions library/core/src/unicode/unicode_data.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
///! This file is generated by src/tools/unicode-table-generator; do not edit manually!
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
#[inline(always)]
fn bitset_search<
const fn bitset_search<
const N: usize,
const CHUNK_SIZE: usize,
const N1: usize,
Expand All @@ -17,14 +18,18 @@ fn bitset_search<
let bucket_idx = (needle / 64) as usize;
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
let chunk_piece = bucket_idx % CHUNK_SIZE;
let chunk_idx = if let Some(&v) = chunk_idx_map.get(chunk_map_idx) {
v
// FIXME: const-hack: Revert to `slice::get` after `const_slice_index`
// feature stabilizes.
let chunk_idx = if chunk_map_idx < chunk_idx_map.len() {
chunk_idx_map[chunk_map_idx]
} else {
return false;
};
let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize;
let word = if let Some(word) = bitset_canonical.get(idx) {
*word
// FIXME: const-hack: Revert to `slice::get` after `const_slice_index`
// feature stabilizes.
let word = if idx < bitset_canonical.len() {
bitset_canonical[idx]
} else {
let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()];
let mut word = bitset_canonical[real_idx as usize];
Expand Down Expand Up @@ -318,14 +323,14 @@ pub mod grapheme_extend {

#[rustfmt::skip]
pub mod lowercase {
static BITSET_CHUNKS_MAP: [u8; 123] = [
const BITSET_CHUNKS_MAP: &'static [u8; 123] = &[
14, 17, 0, 0, 9, 0, 0, 12, 13, 10, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 4, 1, 0, 15, 0, 8, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0,
3, 0, 0, 7,
];
static BITSET_INDEX_CHUNKS: [[u8; 16]; 19] = [
const BITSET_INDEX_CHUNKS: &'static [[u8; 16]; 19] = &[
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 14, 55, 0],
Expand All @@ -346,7 +351,7 @@ pub mod lowercase {
[16, 49, 2, 20, 66, 9, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[63, 39, 54, 12, 73, 61, 18, 1, 6, 62, 71, 19, 68, 69, 3, 44],
];
static BITSET_CANONICAL: [u64; 55] = [
const BITSET_CANONICAL: &'static [u64; 55] = &[
0b0000000000000000000000000000000000000000000000000000000000000000,
0b1111111111111111110000000000000000000000000011111111111111111111,
0b1010101010101010101010101010101010101010101010101010100000000010,
Expand Down Expand Up @@ -403,13 +408,14 @@ pub mod lowercase {
0b1110011111111111111111111111111111111111111111110000000000000000,
0b1110101111000000000000000000000000001111111111111111111111111100,
];
static BITSET_MAPPING: [(u8, u8); 20] = [
const BITSET_MAPPING: &'static [(u8, u8); 20] = &[
(0, 64), (1, 188), (1, 183), (1, 176), (1, 109), (1, 124), (1, 126), (1, 66), (1, 70),
(1, 77), (2, 146), (2, 144), (2, 83), (3, 12), (3, 6), (4, 156), (4, 78), (5, 187),
(6, 132), (7, 93),
];

pub fn lookup(c: char) -> bool {
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
pub const fn lookup(c: char) -> bool {
super::bitset_search(
c as u32,
&BITSET_CHUNKS_MAP,
Expand Down Expand Up @@ -454,14 +460,14 @@ pub mod n {

#[rustfmt::skip]
pub mod uppercase {
static BITSET_CHUNKS_MAP: [u8; 125] = [
const BITSET_CHUNKS_MAP: &'static [u8; 125] = &[
12, 15, 6, 6, 0, 6, 6, 2, 4, 11, 6, 16, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 5, 6, 14, 6, 10, 6, 6, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 13, 6, 6,
6, 6, 9, 6, 3,
];
static BITSET_INDEX_CHUNKS: [[u8; 16]; 17] = [
const BITSET_INDEX_CHUNKS: &'static [[u8; 16]; 17] = &[
[43, 43, 5, 34, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 5, 1],
[43, 43, 5, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43],
[43, 43, 39, 43, 43, 43, 43, 43, 17, 17, 62, 17, 42, 29, 24, 23],
Expand All @@ -480,7 +486,7 @@ pub mod uppercase {
[57, 19, 2, 18, 10, 47, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43],
[57, 37, 17, 27, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43],
];
static BITSET_CANONICAL: [u64; 43] = [
const BITSET_CANONICAL: &'static [u64; 43] = &[
0b0000011111111111111111111111111000000000000000000000000000000000,
0b0000000000111111111111111111111111111111111111111111111111111111,
0b0101010101010101010101010101010101010101010101010101010000000001,
Expand Down Expand Up @@ -525,13 +531,14 @@ pub mod uppercase {
0b1111011111111111000000000000000000000000000000000000000000000000,
0b1111111100000000111111110000000000111111000000001111111100000000,
];
static BITSET_MAPPING: [(u8, u8); 25] = [
const BITSET_MAPPING: &'static [(u8, u8); 25] = &[
(0, 187), (0, 177), (0, 171), (0, 167), (0, 164), (0, 32), (0, 47), (0, 51), (0, 121),
(0, 117), (0, 109), (1, 150), (1, 148), (1, 142), (1, 134), (1, 131), (1, 64), (2, 164),
(2, 146), (2, 20), (3, 146), (3, 140), (3, 134), (4, 178), (4, 171),
];

pub fn lookup(c: char) -> bool {
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
pub const fn lookup(c: char) -> bool {
super::bitset_search(
c as u32,
&BITSET_CHUNKS_MAP,
Expand Down
15 changes: 10 additions & 5 deletions src/tools/unicode-table-generator/src/range_search.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
#[inline(always)]
fn bitset_search<
const fn bitset_search<
const N: usize,
const CHUNK_SIZE: usize,
const N1: usize,
Expand All @@ -15,14 +16,18 @@ fn bitset_search<
let bucket_idx = (needle / 64) as usize;
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
let chunk_piece = bucket_idx % CHUNK_SIZE;
let chunk_idx = if let Some(&v) = chunk_idx_map.get(chunk_map_idx) {
v
// FIXME: const-hack: Revert to `slice::get` after `const_slice_index`
// feature stabilizes.
let chunk_idx = if chunk_map_idx < chunk_idx_map.len() {
chunk_idx_map[chunk_map_idx]
} else {
return false;
};
let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize;
let word = if let Some(word) = bitset_canonical.get(idx) {
*word
// FIXME: const-hack: Revert to `slice::get` after `const_slice_index`
// feature stabilizes.
let word = if idx < bitset_canonical.len() {
bitset_canonical[idx]
} else {
let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()];
let mut word = bitset_canonical[real_idx as usize];
Expand Down
15 changes: 10 additions & 5 deletions src/tools/unicode-table-generator/src/raw_emitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,15 @@ impl RawEmitter {

writeln!(
&mut self.file,
"static BITSET_CANONICAL: [u64; {}] = [{}];",
"const BITSET_CANONICAL: &'static [u64; {}] = &[{}];",
canonicalized.canonical_words.len(),
fmt_list(canonicalized.canonical_words.iter().map(|v| Bits(*v))),
)
.unwrap();
self.bytes_used += 8 * canonicalized.canonical_words.len();
writeln!(
&mut self.file,
"static BITSET_MAPPING: [(u8, u8); {}] = [{}];",
"const BITSET_MAPPING: &'static [(u8, u8); {}] = &[{}];",
canonicalized.canonicalized_words.len(),
fmt_list(&canonicalized.canonicalized_words),
)
Expand All @@ -96,7 +96,12 @@ impl RawEmitter {

self.blank_line();

writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap();
writeln!(
&mut self.file,
r#"#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]"#
)
.unwrap();
writeln!(&mut self.file, "pub const fn lookup(c: char) -> bool {{").unwrap();
writeln!(&mut self.file, " super::bitset_search(",).unwrap();
writeln!(&mut self.file, " c as u32,").unwrap();
writeln!(&mut self.file, " &BITSET_CHUNKS_MAP,").unwrap();
Expand Down Expand Up @@ -130,15 +135,15 @@ impl RawEmitter {

writeln!(
&mut self.file,
"static BITSET_CHUNKS_MAP: [u8; {}] = [{}];",
"const BITSET_CHUNKS_MAP: &'static [u8; {}] = &[{}];",
chunk_indices.len(),
fmt_list(&chunk_indices),
)
.unwrap();
self.bytes_used += chunk_indices.len();
writeln!(
&mut self.file,
"static BITSET_INDEX_CHUNKS: [[u8; {}]; {}] = [{}];",
"const BITSET_INDEX_CHUNKS: &'static [[u8; {}]; {}] = &[{}];",
chunk_length,
chunks.len(),
fmt_list(chunks.iter()),
Expand Down

0 comments on commit d9bba11

Please sign in to comment.