Skip to content

Commit b965196

Browse files
authored
Rollup merge of #72413 - CAD97:char-range, r=dtolnay
impl Step for char (make Range*<char> iterable) [[irlo thread]](https://internals.rust-lang.org/t/mini-rfc-make-range-char-work/12392?u=cad97) [[godbolt asm example]](https://rust.godbolt.org/z/fdveKo) Add an implementation of the `Step` trait for `char`, which has the effect of making `RangeInclusive<char>` (and the other range types) iterable. I've used the surrogate range magic numbers as magic numbers here rather than e.g. a `const SURROGATE_RANGE = 0xD800..0xE000` because these numbers appear to be used as magic numbers elsewhere and there doesn't exist constants for them yet. These files definitely aren't where surrogate range constants should live. `ExactSizeIterator` is not implemented because `0x10FFFF` is bigger than fits in a `usize == u16`. However, given we already provide some `ExactSizeIterator` that are not correct on 16 bit targets, we might still want to consider providing it for `Range`[`Inclusive`]`<char>`, as it is definitely _very_ convenient. (At the very least, we want to make sure `.count()` doesn't bother iterating the range.) The second commit in this PR changes a call to `Step::forward` to use `Step::forward_unchecked` in `RangeInclusive::next`. This is because without this patch, iteration over all codepoints (`'\0'..=char::MAX`) does not successfully optimize out the panicking branch. This was mentioned in the PR that updated `Step` to its current design, but was deemed not yet necessary as it did not impact codegen for integral types. More of `Range*`'s implementations' calls to `Step` methods will probably want to see if they can use the `_unchecked` version as (if) we open up `Step` to being implemented on more types. --- cc @rust-lang/libs, this is insta-stable and a fairly significant addition to `Range*`'s capabilities; this is the first instance of a noncontinuous domain being iterable with `Range` (or, well, anything other than primitive integers). I don't think this needs a full RFC, but it should definitely get some decent eyes on it.
2 parents de561a9 + cd6a8ca commit b965196

File tree

2 files changed

+85
-1
lines changed

2 files changed

+85
-1
lines changed

src/libcore/iter/range.rs

+73-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::char;
12
use crate::convert::TryFrom;
23
use crate::mem;
34
use crate::ops::{self, Add, Sub, Try};
@@ -400,6 +401,73 @@ step_integer_impls! {
400401
wider than usize: [u32 i32], [u64 i64], [u128 i128];
401402
}
402403

404+
#[unstable(feature = "step_trait", reason = "recently redesigned", issue = "42168")]
405+
unsafe impl Step for char {
406+
#[inline]
407+
fn steps_between(&start: &char, &end: &char) -> Option<usize> {
408+
let start = start as u32;
409+
let end = end as u32;
410+
if start <= end {
411+
let count = end - start;
412+
if start < 0xD800 && 0xE000 <= end {
413+
usize::try_from(count - 0x800).ok()
414+
} else {
415+
usize::try_from(count).ok()
416+
}
417+
} else {
418+
None
419+
}
420+
}
421+
422+
#[inline]
423+
fn forward_checked(start: char, count: usize) -> Option<char> {
424+
let start = start as u32;
425+
let mut res = Step::forward_checked(start, count)?;
426+
if start < 0xD800 && 0xD800 <= res {
427+
res = Step::forward_checked(res, 0x800)?;
428+
}
429+
if res <= char::MAX as u32 {
430+
// SAFETY: res is a valid unicode scalar
431+
// (below 0x110000 and not in 0xD800..0xE000)
432+
Some(unsafe { char::from_u32_unchecked(res) })
433+
} else {
434+
None
435+
}
436+
}
437+
438+
#[inline]
439+
fn backward_checked(start: char, count: usize) -> Option<char> {
440+
let start = start as u32;
441+
let mut res = Step::backward_checked(start, count)?;
442+
if start >= 0xE000 && 0xE000 > res {
443+
res = Step::backward_checked(res, 0x800)?;
444+
}
445+
// SAFETY: res is a valid unicode scalar
446+
// (below 0x110000 and not in 0xD800..0xE000)
447+
Some(unsafe { char::from_u32_unchecked(res) })
448+
}
449+
450+
#[inline]
451+
unsafe fn forward_unchecked(start: char, count: usize) -> char {
452+
let start = start as u32;
453+
let mut res = Step::forward_unchecked(start, count);
454+
if start < 0xD800 && 0xD800 <= res {
455+
res = Step::forward_unchecked(res, 0x800);
456+
}
457+
char::from_u32_unchecked(res)
458+
}
459+
460+
#[inline]
461+
unsafe fn backward_unchecked(start: char, count: usize) -> char {
462+
let start = start as u32;
463+
let mut res = Step::backward_unchecked(start, count);
464+
if start >= 0xE000 && 0xE000 > res {
465+
res = Step::backward_unchecked(res, 0x800);
466+
}
467+
char::from_u32_unchecked(res)
468+
}
469+
}
470+
403471
macro_rules! range_exact_iter_impl {
404472
($($t:ty)*) => ($(
405473
#[stable(feature = "rust1", since = "1.0.0")]
@@ -582,7 +650,11 @@ impl<A: Step> Iterator for ops::RangeInclusive<A> {
582650
}
583651
let is_iterating = self.start < self.end;
584652
Some(if is_iterating {
585-
let n = Step::forward(self.start.clone(), 1);
653+
// SAFETY: just checked precondition
654+
// We use the unchecked version here, because
655+
// otherwise `for _ in '\0'..=char::MAX`
656+
// does not successfully remove panicking code.
657+
let n = unsafe { Step::forward_unchecked(self.start.clone(), 1) };
586658
mem::replace(&mut self.start, n)
587659
} else {
588660
self.exhausted = true;

src/libcore/tests/iter.rs

+12
Original file line numberDiff line numberDiff line change
@@ -1932,6 +1932,18 @@ fn test_range() {
19321932
);
19331933
}
19341934

1935+
#[test]
1936+
fn test_char_range() {
1937+
use std::char;
1938+
assert!(('\0'..=char::MAX).eq((0..=char::MAX as u32).filter_map(char::from_u32)));
1939+
assert!(('\0'..=char::MAX).rev().eq((0..=char::MAX as u32).filter_map(char::from_u32).rev()));
1940+
1941+
assert_eq!(('\u{D7FF}'..='\u{E000}').count(), 2);
1942+
assert_eq!(('\u{D7FF}'..='\u{E000}').size_hint(), (2, Some(2)));
1943+
assert_eq!(('\u{D7FF}'..'\u{E000}').count(), 1);
1944+
assert_eq!(('\u{D7FF}'..'\u{E000}').size_hint(), (1, Some(1)));
1945+
}
1946+
19351947
#[test]
19361948
fn test_range_exhaustion() {
19371949
let mut r = 10..10;

0 commit comments

Comments
 (0)