Skip to content

Commit 05424c7

Browse files
committed
Improve docs for std::char
Part of #29428
1 parent edf2198 commit 05424c7

File tree

2 files changed

+169
-36
lines changed

2 files changed

+169
-36
lines changed

src/libcore/char.rs

+142-16
Original file line numberDiff line numberDiff line change
@@ -63,19 +63,63 @@ const MAX_THREE_B: u32 = 0x10000;
6363
Cn Unassigned a reserved unassigned code point or a noncharacter
6464
*/
6565

66-
/// The highest valid code point
66+
/// The highest valid code point a `char` can have.
67+
///
68+
/// A [`char`] is a [Unicode Scalar Value], which means that it is a [Code
69+
/// Point], but only ones within a certain range. `MAX` is the highest valid
70+
/// code point that's a valid [Unicode Scalar Value].
71+
///
72+
/// [`char`]: primitive.char.html
73+
/// [Unicode Scalar Value]: http://www.unicode.org/glossary/#unicode_scalar_value
74+
/// [Code Point]: http://www.unicode.org/glossary/#code_point
6775
#[stable(feature = "rust1", since = "1.0.0")]
6876
pub const MAX: char = '\u{10ffff}';
6977

70-
/// Converts a `u32` to an `Option<char>`.
78+
/// Converts a `u32` to a `char`.
79+
///
80+
/// Note that all [`char`]s are valid [`u32`]s, and can be casted to one with
81+
/// [`as`]:
82+
///
83+
/// ```
84+
/// let c = '💯';
85+
/// let i = c as u32;
86+
///
87+
/// assert_eq!(128175, i);
88+
/// ```
89+
///
90+
/// However, the reverse is not true: not all valid [`u32`]s are valid
91+
/// [`char`]s. `from_u32()` will return `None` if the input is not a valid value
92+
/// for a [`char`].
93+
///
94+
/// [`char`]: primitive.char.html
95+
/// [`u32`]: primitive.u32.html
96+
/// [`as`]: ../book/casting-between-types.html#as
97+
///
98+
/// For an unsafe version of this function which ignores these checks, see
99+
/// [`from_u32_unchecked()`].
100+
///
101+
/// [`from_u32_unchecked()`]: fn.from_u32_unchecked.html
71102
///
72103
/// # Examples
73104
///
105+
/// Basic usage:
106+
///
74107
/// ```
75108
/// use std::char;
76109
///
77-
/// assert_eq!(char::from_u32(0x2764), Some('❤'));
78-
/// assert_eq!(char::from_u32(0x110000), None); // invalid character
110+
/// let c = char::from_u32(0x2764);
111+
///
112+
/// assert_eq!(Some('❤'), c);
113+
/// ```
114+
///
115+
/// Returning `None` when the input is not a valid [`char`]:
116+
///
117+
/// ```
118+
/// use std::char;
119+
///
120+
/// let c = char::from_u32(0x110000);
121+
///
122+
/// assert_eq!(None, c);
79123
/// ```
80124
#[inline]
81125
#[stable(feature = "rust1", since = "1.0.0")]
@@ -88,33 +132,104 @@ pub fn from_u32(i: u32) -> Option<char> {
88132
}
89133
}
90134

91-
/// Converts a `u32` to an `char`, not checking whether it is a valid unicode
92-
/// codepoint.
135+
/// Converts a `u32` to a `char`, ignoring validity.
136+
///
137+
/// Note that all [`char`]s are valid [`u32`]s, and can be casted to one with
138+
/// [`as`]:
139+
///
140+
/// ```
141+
/// let c = '💯';
142+
/// let i = c as u32;
143+
///
144+
/// assert_eq!(128175, i);
145+
/// ```
146+
///
147+
/// However, the reverse is not true: not all valid [`u32`]s are valid
148+
/// [`char`]s. `from_u32_unchecked()` will ignore this, and blindly cast to
149+
/// [`char`], possibly creating an invalid one.
150+
///
151+
/// [`char`]: primitive.char.html
152+
/// [`u32`]: primitive.u32.html
153+
/// [`as`]: ../book/casting-between-types.html#as
154+
///
155+
/// # Safety
156+
///
157+
/// This function is unsafe, as it may construct invalid `char` values.
158+
///
159+
/// For a safe version of this function, see the [`from_u32()`] function.
160+
///
161+
/// [`from_u32()`]: fn.from_u32.html
162+
///
163+
/// # Examples
164+
///
165+
/// Basic usage:
166+
///
167+
/// ```
168+
/// use std::char;
169+
///
170+
/// let c = unsafe { char::from_u32_unchecked(0x2764) };
171+
///
172+
/// assert_eq!('❤', c);
173+
/// ```
93174
#[inline]
94175
#[stable(feature = "char_from_unchecked", since = "1.5.0")]
95176
pub unsafe fn from_u32_unchecked(i: u32) -> char {
96177
transmute(i)
97178
}
98179

99-
/// Converts a number to the character representing it.
180+
/// Converts a digit in the given radix to a `char`.
100181
///
101-
/// # Return value
182+
/// A 'radix' here is sometimes also called a 'base'. A radix of two
183+
/// indicates a binary number, a radix of ten, decimal, and a radix of
184+
/// sixteen, hexicdecimal, to give some common values. Arbitrary
185+
/// radicum are supported.
102186
///
103-
/// Returns `Some(char)` if `num` represents one digit under `radix`,
104-
/// using one character of `0-9` or `a-z`, or `None` if it doesn't.
187+
/// `from_digit()` will return `None` if the input is not a digit in
188+
/// the given radix.
105189
///
106190
/// # Panics
107191
///
108-
/// Panics if given an `radix` > 36.
192+
/// Panics if given a radix larger than 36.
109193
///
110194
/// # Examples
111195
///
196+
/// Basic usage:
197+
///
112198
/// ```
113199
/// use std::char;
114200
///
115201
/// let c = char::from_digit(4, 10);
116202
///
117-
/// assert_eq!(c, Some('4'));
203+
/// assert_eq!(Some('4'), c);
204+
///
205+
/// // Decimal 11 is a single digit in base 16
206+
/// let c = char::from_digit(11, 16);
207+
///
208+
/// assert_eq!(Some('b'), c);
209+
/// ```
210+
///
211+
/// Returning `None` when the input is not a digit:
212+
///
213+
/// ```
214+
/// use std::char;
215+
///
216+
/// let c = char::from_digit(20, 10);
217+
///
218+
/// assert_eq!(None, c);
219+
/// ```
220+
///
221+
/// Passing a large radix, causing a panic:
222+
///
223+
/// ```
224+
/// use std::thread;
225+
/// use std::char;
226+
///
227+
/// let result = thread::spawn(|| {
228+
/// // this panics
229+
/// let c = char::from_digit(1, 37);
230+
/// }).join();
231+
///
232+
/// assert!(result.is_err());
118233
/// ```
119234
#[inline]
120235
#[stable(feature = "rust1", since = "1.0.0")]
@@ -287,8 +402,14 @@ pub fn encode_utf16_raw(mut ch: u32, dst: &mut [u16]) -> Option<usize> {
287402
}
288403
}
289404

290-
/// An iterator over the characters that represent a `char`, as escaped by
291-
/// Rust's unicode escaping rules.
405+
/// Returns an iterator that yields the hexadecimal Unicode escape of a
406+
/// character, as `char`s.
407+
///
408+
/// This `struct` is created by the [`escape_unicode()`] method on [`char`]. See
409+
/// its documentation for more.
410+
///
411+
/// [`escape_unicode()`]: primitive.char.html#method.escape_unicode
412+
/// [`char`]: primitive.char.html
292413
#[derive(Clone)]
293414
#[stable(feature = "rust1", since = "1.0.0")]
294415
pub struct EscapeUnicode {
@@ -362,8 +483,13 @@ impl Iterator for EscapeUnicode {
362483
}
363484
}
364485

365-
/// An iterator over the characters that represent a `char`, escaped
366-
/// for maximum portability.
486+
/// An iterator that yields the literal escape code of a `char`.
487+
///
488+
/// This `struct` is created by the [`escape_default()`] method on [`char`]. See
489+
/// its documentation for more.
490+
///
491+
/// [`escape_default()`]: primitive.char.html#method.escape_default
492+
/// [`char`]: primitive.char.html
367493
#[derive(Clone)]
368494
#[stable(feature = "rust1", since = "1.0.0")]
369495
pub struct EscapeDefault {

src/librustc_unicode/char.rs

+27-20
Original file line numberDiff line numberDiff line change
@@ -8,24 +8,23 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
//! Unicode scalar values
11+
//! A character type.
1212
//!
13-
//! This module provides the `CharExt` trait, as well as its
14-
//! implementation for the primitive `char` type, in order to allow
15-
//! basic character manipulation.
13+
//! The `char` type represents a single character. More specifically, since
14+
//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
15+
//! scalar value]', which is similar to, but not the same as, a '[Unicode code
16+
//! point]'.
1617
//!
17-
//! A `char` represents a
18-
//! *[Unicode scalar
19-
//! value](http://www.unicode.org/glossary/#unicode_scalar_value)*, as it can
20-
//! contain any Unicode code point except high-surrogate and low-surrogate code
21-
//! points.
18+
//! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value
19+
//! [Unicode code point]: http://www.unicode.org/glossary/#code_point
2220
//!
23-
//! As such, only values in the ranges \[0x0,0xD7FF\] and \[0xE000,0x10FFFF\]
24-
//! (inclusive) are allowed. A `char` can always be safely cast to a `u32`;
25-
//! however the converse is not always true due to the above range limits
26-
//! and, as such, should be performed via the `from_u32` function.
21+
//! This module exists for technical reasons, the primary documentation for
22+
//! `char` is directly on [the `char` primitive type](../primitive.char.html)
23+
//! itself.
2724
//!
28-
//! *[See also the `char` primitive type](../primitive.char.html).*
25+
//! This module is the home of the iterator implementations for the iterators
26+
//! implemented on `char`, as well as some useful constants and conversion
27+
//! functions that convert various types to `char`.
2928
3029
#![stable(feature = "rust1", since = "1.0.0")]
3130

@@ -42,9 +41,13 @@ pub use core::char::{MAX, from_u32, from_u32_unchecked, from_digit, EscapeUnicod
4241
#[unstable(feature = "unicode", issue = "27783")]
4342
pub use tables::UNICODE_VERSION;
4443

45-
/// An iterator over the lowercase mapping of a given character, returned from
46-
/// the [`to_lowercase` method](../primitive.char.html#method.to_lowercase) on
47-
/// characters.
44+
/// Returns an iterator that yields the lowercase equivalent of a `char`.
45+
///
46+
/// This `struct` is created by the [`to_lowercase()`] method on [`char`]. See
47+
/// its documentation for more.
48+
///
49+
/// [`to_lowercase()`]: primitive.char.html#method.escape_to_lowercase
50+
/// [`char`]: primitive.char.html
4851
#[stable(feature = "rust1", since = "1.0.0")]
4952
pub struct ToLowercase(CaseMappingIter);
5053

@@ -56,9 +59,13 @@ impl Iterator for ToLowercase {
5659
}
5760
}
5861

59-
/// An iterator over the uppercase mapping of a given character, returned from
60-
/// the [`to_uppercase` method](../primitive.char.html#method.to_uppercase) on
61-
/// characters.
62+
/// Returns an iterator that yields the uppercase equivalent of a `char`.
63+
///
64+
/// This `struct` is created by the [`to_uppercase()`] method on [`char`]. See
65+
/// its documentation for more.
66+
///
67+
/// [`to_uppercase()`]: primitive.char.html#method.escape_to_uppercase
68+
/// [`char`]: primitive.char.html
6269
#[stable(feature = "rust1", since = "1.0.0")]
6370
pub struct ToUppercase(CaseMappingIter);
6471

0 commit comments

Comments
 (0)