Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make char::is_lowercase and char::is_uppercase const #101401

Merged
merged 2 commits into from
Sep 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions library/core/src/char/methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -746,10 +746,19 @@ impl char {
/// assert!(!'中'.is_lowercase());
/// assert!(!' '.is_lowercase());
/// ```
///
/// In a const context:
///
/// ```
/// #![feature(const_unicode_case_lookup)]
/// const CAPITAL_DELTA_IS_LOWERCASE: bool = 'Δ'.is_lowercase();
/// assert!(!CAPITAL_DELTA_IS_LOWERCASE);
/// ```
#[must_use]
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
#[inline]
pub fn is_lowercase(self) -> bool {
pub const fn is_lowercase(self) -> bool {
match self {
'a'..='z' => true,
c => c > '\x7f' && unicode::Lowercase(c),
Expand Down Expand Up @@ -779,10 +788,19 @@ impl char {
/// assert!(!'中'.is_uppercase());
/// assert!(!' '.is_uppercase());
/// ```
///
/// In a const context:
///
/// ```
/// #![feature(const_unicode_case_lookup)]
/// const CAPITAL_DELTA_IS_UPPERCASE: bool = 'Δ'.is_uppercase();
/// assert!(CAPITAL_DELTA_IS_UPPERCASE);
/// ```
#[must_use]
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
#[inline]
pub fn is_uppercase(self) -> bool {
pub const fn is_uppercase(self) -> bool {
match self {
'A'..='Z' => true,
c => c > '\x7f' && unicode::Uppercase(c),
Expand Down
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@
#![feature(const_type_id)]
#![feature(const_type_name)]
#![feature(const_default_impls)]
#![feature(const_unicode_case_lookup)]
#![feature(const_unsafecell_get_mut)]
#![feature(core_panic)]
#![feature(duration_consts_float)]
Expand Down
37 changes: 22 additions & 15 deletions library/core/src/unicode/unicode_data.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
///! This file is generated by src/tools/unicode-table-generator; do not edit manually!

#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
#[inline(always)]
fn bitset_search<
const fn bitset_search<
const N: usize,
const CHUNK_SIZE: usize,
const N1: usize,
Expand All @@ -17,14 +18,18 @@ fn bitset_search<
let bucket_idx = (needle / 64) as usize;
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
let chunk_piece = bucket_idx % CHUNK_SIZE;
let chunk_idx = if let Some(&v) = chunk_idx_map.get(chunk_map_idx) {
v
// FIXME: const-hack: Revert to `slice::get` after `const_slice_index`
// feature stabilizes.
let chunk_idx = if chunk_map_idx < chunk_idx_map.len() {
chunk_idx_map[chunk_map_idx]
} else {
return false;
};
let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize;
let word = if let Some(word) = bitset_canonical.get(idx) {
*word
// FIXME: const-hack: Revert to `slice::get` after `const_slice_index`
// feature stabilizes.
let word = if idx < bitset_canonical.len() {
bitset_canonical[idx]
} else {
let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()];
let mut word = bitset_canonical[real_idx as usize];
Expand Down Expand Up @@ -318,14 +323,14 @@ pub mod grapheme_extend {

#[rustfmt::skip]
pub mod lowercase {
static BITSET_CHUNKS_MAP: [u8; 123] = [
const BITSET_CHUNKS_MAP: &'static [u8; 123] = &[
14, 17, 0, 0, 9, 0, 0, 12, 13, 10, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 4, 1, 0, 15, 0, 8, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0,
3, 0, 0, 7,
];
static BITSET_INDEX_CHUNKS: [[u8; 16]; 19] = [
const BITSET_INDEX_CHUNKS: &'static [[u8; 16]; 19] = &[
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 14, 55, 0],
Expand All @@ -346,7 +351,7 @@ pub mod lowercase {
[16, 49, 2, 20, 66, 9, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[63, 39, 54, 12, 73, 61, 18, 1, 6, 62, 71, 19, 68, 69, 3, 44],
];
static BITSET_CANONICAL: [u64; 55] = [
const BITSET_CANONICAL: &'static [u64; 55] = &[
0b0000000000000000000000000000000000000000000000000000000000000000,
0b1111111111111111110000000000000000000000000011111111111111111111,
0b1010101010101010101010101010101010101010101010101010100000000010,
Expand Down Expand Up @@ -403,13 +408,14 @@ pub mod lowercase {
0b1110011111111111111111111111111111111111111111110000000000000000,
0b1110101111000000000000000000000000001111111111111111111111111100,
];
static BITSET_MAPPING: [(u8, u8); 20] = [
const BITSET_MAPPING: &'static [(u8, u8); 20] = &[
(0, 64), (1, 188), (1, 183), (1, 176), (1, 109), (1, 124), (1, 126), (1, 66), (1, 70),
(1, 77), (2, 146), (2, 144), (2, 83), (3, 12), (3, 6), (4, 156), (4, 78), (5, 187),
(6, 132), (7, 93),
];

pub fn lookup(c: char) -> bool {
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
pub const fn lookup(c: char) -> bool {
super::bitset_search(
c as u32,
&BITSET_CHUNKS_MAP,
Expand Down Expand Up @@ -454,14 +460,14 @@ pub mod n {

#[rustfmt::skip]
pub mod uppercase {
static BITSET_CHUNKS_MAP: [u8; 125] = [
const BITSET_CHUNKS_MAP: &'static [u8; 125] = &[
12, 15, 6, 6, 0, 6, 6, 2, 4, 11, 6, 16, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 5, 6, 14, 6, 10, 6, 6, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 13, 6, 6,
6, 6, 9, 6, 3,
];
static BITSET_INDEX_CHUNKS: [[u8; 16]; 17] = [
const BITSET_INDEX_CHUNKS: &'static [[u8; 16]; 17] = &[
[43, 43, 5, 34, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 5, 1],
[43, 43, 5, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43],
[43, 43, 39, 43, 43, 43, 43, 43, 17, 17, 62, 17, 42, 29, 24, 23],
Expand All @@ -480,7 +486,7 @@ pub mod uppercase {
[57, 19, 2, 18, 10, 47, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43],
[57, 37, 17, 27, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43],
];
static BITSET_CANONICAL: [u64; 43] = [
const BITSET_CANONICAL: &'static [u64; 43] = &[
0b0000011111111111111111111111111000000000000000000000000000000000,
0b0000000000111111111111111111111111111111111111111111111111111111,
0b0101010101010101010101010101010101010101010101010101010000000001,
Expand Down Expand Up @@ -525,13 +531,14 @@ pub mod uppercase {
0b1111011111111111000000000000000000000000000000000000000000000000,
0b1111111100000000111111110000000000111111000000001111111100000000,
];
static BITSET_MAPPING: [(u8, u8); 25] = [
const BITSET_MAPPING: &'static [(u8, u8); 25] = &[
(0, 187), (0, 177), (0, 171), (0, 167), (0, 164), (0, 32), (0, 47), (0, 51), (0, 121),
(0, 117), (0, 109), (1, 150), (1, 148), (1, 142), (1, 134), (1, 131), (1, 64), (2, 164),
(2, 146), (2, 20), (3, 146), (3, 140), (3, 134), (4, 178), (4, 171),
];

pub fn lookup(c: char) -> bool {
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
pub const fn lookup(c: char) -> bool {
super::bitset_search(
c as u32,
&BITSET_CHUNKS_MAP,
Expand Down
15 changes: 10 additions & 5 deletions src/tools/unicode-table-generator/src/range_search.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
#[inline(always)]
fn bitset_search<
const fn bitset_search<
const N: usize,
const CHUNK_SIZE: usize,
const N1: usize,
Expand All @@ -15,14 +16,18 @@ fn bitset_search<
let bucket_idx = (needle / 64) as usize;
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
let chunk_piece = bucket_idx % CHUNK_SIZE;
let chunk_idx = if let Some(&v) = chunk_idx_map.get(chunk_map_idx) {
v
// FIXME: const-hack: Revert to `slice::get` after `const_slice_index`
// feature stabilizes.
let chunk_idx = if chunk_map_idx < chunk_idx_map.len() {
chunk_idx_map[chunk_map_idx]
} else {
return false;
};
let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize;
let word = if let Some(word) = bitset_canonical.get(idx) {
*word
// FIXME: const-hack: Revert to `slice::get` after `const_slice_index`
// feature stabilizes.
let word = if idx < bitset_canonical.len() {
bitset_canonical[idx]
} else {
let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()];
let mut word = bitset_canonical[real_idx as usize];
Expand Down
15 changes: 10 additions & 5 deletions src/tools/unicode-table-generator/src/raw_emitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,15 @@ impl RawEmitter {

writeln!(
&mut self.file,
"static BITSET_CANONICAL: [u64; {}] = [{}];",
"const BITSET_CANONICAL: &'static [u64; {}] = &[{}];",
canonicalized.canonical_words.len(),
fmt_list(canonicalized.canonical_words.iter().map(|v| Bits(*v))),
)
.unwrap();
self.bytes_used += 8 * canonicalized.canonical_words.len();
writeln!(
&mut self.file,
"static BITSET_MAPPING: [(u8, u8); {}] = [{}];",
"const BITSET_MAPPING: &'static [(u8, u8); {}] = &[{}];",
canonicalized.canonicalized_words.len(),
fmt_list(&canonicalized.canonicalized_words),
)
Expand All @@ -96,7 +96,12 @@ impl RawEmitter {

self.blank_line();

writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap();
writeln!(
&mut self.file,
r#"#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]"#
)
.unwrap();
writeln!(&mut self.file, "pub const fn lookup(c: char) -> bool {{").unwrap();
writeln!(&mut self.file, " super::bitset_search(",).unwrap();
writeln!(&mut self.file, " c as u32,").unwrap();
writeln!(&mut self.file, " &BITSET_CHUNKS_MAP,").unwrap();
Expand Down Expand Up @@ -130,15 +135,15 @@ impl RawEmitter {

writeln!(
&mut self.file,
"static BITSET_CHUNKS_MAP: [u8; {}] = [{}];",
"const BITSET_CHUNKS_MAP: &'static [u8; {}] = &[{}];",
chunk_indices.len(),
fmt_list(&chunk_indices),
)
.unwrap();
self.bytes_used += chunk_indices.len();
writeln!(
&mut self.file,
"static BITSET_INDEX_CHUNKS: [[u8; {}]; {}] = [{}];",
"const BITSET_INDEX_CHUNKS: &'static [[u8; {}]; {}] = &[{}];",
chunk_length,
chunks.len(),
fmt_list(chunks.iter()),
Expand Down