Skip to content

Add APIs for dealing with titlecase #122668

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions library/alloc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@
#![feature(std_internals)]
#![feature(str_internals)]
#![feature(strict_provenance)]
#![feature(titlecase)]
#![feature(trusted_fused)]
#![feature(trusted_len)]
#![feature(trusted_random_access)]
Expand Down
4 changes: 2 additions & 2 deletions library/alloc/src/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -410,9 +410,9 @@ impl str {
}

fn case_ignorable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool {
use core::unicode::{Case_Ignorable, Cased};
use core::unicode::Case_Ignorable;
match iter.skip_while(|&c| Case_Ignorable(c)).next() {
Some(c) => Cased(c),
Some(c) => c.is_cased(),
None => false,
}
}
Expand Down
258 changes: 245 additions & 13 deletions library/core/src/char/methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -742,8 +742,73 @@ impl char {
#[inline]
pub fn is_alphabetic(self) -> bool {
match self {
'a'..='z' | 'A'..='Z' => true,
c => c > '\x7f' && unicode::Alphabetic(c),
'A'..='Z' | 'a'..='z' => true,
'\0'..='\u{A9}' => false,
_ => unicode::Alphabetic(self),
}
}

/// Returns `true` if this `char` has the `Cased` property.
/// A character is cased if and only if it is uppercase, lowercase, or titlecase.
///
/// `Cased` is described in Chapter 3 (Conformance) of the [Unicode Standard] and
/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
///
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
/// [ucd]: https://www.unicode.org/reports/tr44/
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// #![feature(titlecase)]
/// assert!('A'.is_cased());
/// assert!('a'.is_cased());
/// assert!(!'京'.is_cased());
/// ```
#[must_use]
#[unstable(feature = "titlecase", issue = "none")]
#[inline]
pub fn is_cased(self) -> bool {
match self {
'A'..='Z' | 'a'..='z' => true,
'\0'..='\u{A9}' => false,
_ => unicode::Cased(self),
}
}

/// Returns the case of this character:
/// [`Some(CharCase::Upper)`][`CharCase::Upper`] if [`self.is_uppercase()`][`char::is_uppercase`],
/// [`Some(CharCase::Lower)`][`CharCase::Lower`] if [`self.is_lowercase()`][`char::is_lowercase`],
/// [`Some(CharCase::Title)`][`CharCase::Title`] if [`self.is_titlecase()`][`char::is_titlecase`], and
/// `None` if [`!self.is_cased()`][`char::is_cased`].
///
/// # Examples
///
/// ```
/// #![feature(titlecase)]
/// use core::char::CharCase;
/// assert_eq!('a'.case(), Some(CharCase::Lower));
/// assert_eq!('δ'.case(), Some(CharCase::Lower));
/// assert_eq!('A'.case(), Some(CharCase::Upper));
/// assert_eq!('Δ'.case(), Some(CharCase::Upper));
/// assert_eq!('Dž'.case(), Some(CharCase::Title));
/// assert_eq!('中'.case(), None);
/// ```
#[must_use]
#[unstable(feature = "titlecase", issue = "none")]
#[inline]
pub fn case(self) -> Option<CharCase> {
match self {
'A'..='Z' => Some(CharCase::Upper),
'a'..='z' => Some(CharCase::Lower),
'\0'..='\u{A9}' => None,
_ if !self.is_cased() => None,
_ if self.is_lowercase() => Some(CharCase::Lower),
_ if self.is_uppercase() => Some(CharCase::Upper),
_ => Some(CharCase::Title),
}
}

Expand Down Expand Up @@ -785,7 +850,41 @@ impl char {
pub const fn is_lowercase(self) -> bool {
match self {
'a'..='z' => true,
c => c > '\x7f' && unicode::Lowercase(c),
'\0'..='\u{A9}' => false,
_ => unicode::Lowercase(self),
}
}

/// Returns `true` if this `char` has the general category for titlecase letters.
///
/// Titlecase letters (code points with the general category of `Lt`) are described in Chapter 4
/// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
/// Database][ucd] [`UnicodeData.txt`].
///
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
/// [ucd]: https://www.unicode.org/reports/tr44/
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// #![feature(titlecase)]
/// assert!('Dž'.is_titlecase());
/// assert!('ᾨ'.is_titlecase());
/// assert!(!'D'.is_titlecase());
/// assert!(!'z'.is_titlecase());
/// assert!(!'中'.is_titlecase());
/// assert!(!' '.is_titlecase());
/// ```
#[must_use]
#[unstable(feature = "titlecase", issue = "none")]
#[inline]
pub fn is_titlecase(self) -> bool {
match self {
'\0'..='\u{01C4}' => false,
_ => self.is_cased() && !self.is_lowercase() && !self.is_uppercase(),
}
}

Expand Down Expand Up @@ -827,7 +926,8 @@ impl char {
pub const fn is_uppercase(self) -> bool {
match self {
'A'..='Z' => true,
c => c > '\x7f' && unicode::Uppercase(c),
'\0'..='\u{BF}' => false,
_ => unicode::Uppercase(self),
}
}

Expand Down Expand Up @@ -859,7 +959,8 @@ impl char {
pub fn is_whitespace(self) -> bool {
match self {
' ' | '\x09'..='\x0d' => true,
c => c > '\x7f' && unicode::White_Space(c),
'\0'..='\u{84}' => false,
_ => unicode::White_Space(self),
}
}

Expand Down Expand Up @@ -927,7 +1028,7 @@ impl char {
#[must_use]
#[inline]
pub(crate) fn is_grapheme_extended(self) -> bool {
self > '\x7f' && unicode::Grapheme_Extend(self)
self > '\u{02FF}' && unicode::Grapheme_Extend(self)
}

/// Returns `true` if this `char` has one of the general categories for numbers.
Expand Down Expand Up @@ -969,12 +1070,14 @@ impl char {
pub fn is_numeric(self) -> bool {
match self {
'0'..='9' => true,
c => c > '\x7f' && unicode::N(c),
'\0'..='\u{B1}' => false,
_ => unicode::N(self),
}
}

/// Returns an iterator that yields the lowercase mapping of this `char` as one or more
/// `char`s.
/// `char`s. The iterator also has implementations of [`Display`][core::fmt::Display]
/// and [`PartialEq`].
///
/// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
///
Expand Down Expand Up @@ -1032,16 +1135,138 @@ impl char {
/// // convert into themselves.
/// assert_eq!('山'.to_lowercase().to_string(), "山");
/// ```
#[must_use = "this returns the lowercase character as a new iterator, \
///
/// Check if a string is in lowercase:
///
/// ```
/// let s = "abcde\u{0301} 山";
/// assert!(s.chars().all(|c| c.to_lowercase() == c));
/// ```
#[must_use = "this returns the lowercased character as a new iterator, \
without modifying the original"]
#[stable(feature = "rust1", since = "1.0.0")]
#[inline]
pub fn to_lowercase(self) -> ToLowercase {
ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
}

/// Returns an iterator that yields the titlecase mapping of this `char` as one or more
/// `char`s. The iterator also has implementations of [`Display`][core::fmt::Display]
/// and [`PartialEq`].
///
/// If this `char` does not have an titlecase mapping, the iterator yields the same `char`.
///
/// If this `char` has a one-to-one titlecase mapping given by the [Unicode Character
/// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
///
/// [ucd]: https://www.unicode.org/reports/tr44/
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
///
/// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
/// the `char`(s) given by [`SpecialCasing.txt`].
///
/// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
///
/// This operation performs an unconditional mapping without tailoring. That is, the conversion
/// is independent of context and language.
///
/// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
/// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
///
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
///
/// # Examples
///
/// As an iterator:
///
/// ```
/// #![feature(titlecase)]
/// for c in 'ß'.to_titlecase() {
/// print!("{c}");
/// }
/// println!();
/// ```
///
/// Using `println!` directly:
///
/// ```
/// #![feature(titlecase)]
/// println!("{}", 'ß'.to_titlecase());
/// ```
///
/// Both are equivalent to:
///
/// ```
/// #![feature(titlecase)]
/// println!("Ss");
/// ```
///
/// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
///
/// ```
/// #![feature(titlecase)]
/// assert_eq!('c'.to_titlecase().to_string(), "C");
///
/// // Sometimes the result is more than one character:
/// assert_eq!('ß'.to_titlecase().to_string(), "Ss");
///
/// // Characters that do not have separate cased forms
/// // convert into themselves.
/// assert_eq!('山'.to_titlecase().to_string(), "山");
/// ```
///
/// Check if a word is in titlecase:
///
/// ```
/// #![feature(titlecase)]
/// let word = "Dross";
/// let mut chars = word.chars();
/// let first_cased_char = chars.find(|c| c.is_cased());
/// let word_is_in_titlecase = if let Some(f) = first_cased_char {
/// f.to_titlecase() == f && chars.all(|c| c.to_lowercase() == c)
/// } else {
/// true
/// };
/// assert!(word_is_in_titlecase);
/// ```
///
/// # Note on locale
///
/// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
///
/// * 'Dotless': I / ı, sometimes written ï
/// * 'Dotted': İ / i
///
/// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
///
/// ```
/// #![feature(titlecase)]
/// let upper_i = 'i'.to_titlecase().to_string();
/// ```
///
/// The value of `upper_i` here relies on the language of the text: if we're
/// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
/// be `"İ"`. `to_titlecase()` does not take this into account, and so:
///
/// ```
/// #![feature(titlecase)]
/// let upper_i = 'i'.to_titlecase().to_string();
///
/// assert_eq!(upper_i, "I");
/// ```
///
/// holds across languages.
#[must_use = "this returns the titlecased character as a new iterator, \
without modifying the original"]
#[unstable(feature = "titlecase", issue = "none")]
#[inline]
pub fn to_titlecase(self) -> ToTitlecase {
ToTitlecase(CaseMappingIter::new(conversions::to_title(self)))
}

/// Returns an iterator that yields the uppercase mapping of this `char` as one or more
/// `char`s.
/// `char`s. The iterator also has implementations of [`Display`][core::fmt::Display]
/// and [`PartialEq`].
///
/// If this `char` does not have an uppercase mapping, the iterator yields the same `char`.
///
Expand Down Expand Up @@ -1100,9 +1325,16 @@ impl char {
/// assert_eq!('山'.to_uppercase().to_string(), "山");
/// ```
///
/// Check if a string is in uppercase:
///
/// ```
/// let s = "ABCDE\u{0301} 山";
/// assert!(s.chars().all(|c| c.to_uppercase() == c));
/// ```
///
/// # Note on locale
///
/// In Turkish, the equivalent of 'i' in Latin has five forms instead of two:
/// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two:
///
/// * 'Dotless': I / ı, sometimes written ï
/// * 'Dotted': İ / i
Expand All @@ -1114,7 +1346,7 @@ impl char {
/// ```
///
/// The value of `upper_i` here relies on the language of the text: if we're
/// in `en-US`, it should be `"I"`, but if we're in `tr_TR`, it should
/// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should
/// be `"İ"`. `to_uppercase()` does not take this into account, and so:
///
/// ```
Expand All @@ -1124,7 +1356,7 @@ impl char {
/// ```
///
/// holds across languages.
#[must_use = "this returns the uppercase character as a new iterator, \
#[must_use = "this returns the uppercased character as a new iterator, \
without modifying the original"]
#[stable(feature = "rust1", since = "1.0.0")]
#[inline]
Expand Down
Loading
Loading