Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

library: core::str::lines: Fix handling of trailing bare CR #91191

Closed
wants to merge 5 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
@@ -174,6 +174,7 @@
#![feature(intra_doc_pointers)]
#![feature(intrinsics)]
#![feature(lang_items)]
#![feature(let_else)]
#![feature(link_llvm_intrinsics)]
#![feature(llvm_asm)]
#![feature(min_specialization)]
4 changes: 2 additions & 2 deletions library/core/src/str/iter.rs
Original file line number Diff line number Diff line change
@@ -13,7 +13,7 @@ use super::from_utf8_unchecked;
use super::pattern::Pattern;
use super::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher};
use super::validations::{next_code_point, next_code_point_reverse, utf8_is_cont_byte};
use super::LinesAnyMap;
use super::LinesMap;
use super::{BytesIsNotEmpty, UnsafeBytesToStr};
use super::{CharEscapeDebugContinue, CharEscapeDefault, CharEscapeUnicode};
use super::{IsAsciiWhitespace, IsNotEmpty, IsWhitespace};
@@ -1095,7 +1095,7 @@ generate_pattern_iterators! {
#[stable(feature = "rust1", since = "1.0.0")]
#[must_use = "iterators are lazy and do nothing unless consumed"]
#[derive(Clone, Debug)]
pub struct Lines<'a>(pub(super) Map<SplitTerminator<'a, char>, LinesAnyMap>);
pub struct Lines<'a>(pub(super) Map<SplitInclusive<'a, char>, LinesMap>);

#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for Lines<'a> {
67 changes: 62 additions & 5 deletions library/core/src/str/mod.rs
Original file line number Diff line number Diff line change
@@ -899,10 +899,27 @@ impl str {
///
/// assert_eq!(None, lines.next());
/// ```
///
/// Handling of some edge cases:
///
/// ```
/// fn assert_splits_into(input: &str, expected: &[&str]) {
/// assert_eq!( input.lines().collect::<Vec<_>>(), expected );
/// }
///
// Note: there is another copy of this set of corner cases next to `std::io::BufRead::lines()`.
// The two functions should behave the same way; consider editing those other doctests too.
/// assert_splits_into("", &[]);
/// assert_splits_into("\n", &[""]);
/// assert_splits_into("\n2nd", &["", "2nd"]);
/// assert_splits_into("\r\n", &[""]);
/// assert_splits_into("bare\r", &["bare\r"]);
/// assert_splits_into("bare\rcr", &["bare\rcr"]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
#[inline]
pub fn lines(&self) -> Lines<'_> {
Lines(self.split_terminator('\n').map(LinesAnyMap))
Lines(self.split_inclusive('\n').map(LinesMap))
}

/// An iterator over the lines of a string.
@@ -1833,6 +1850,48 @@ impl str {
self.trim_end_matches(|c: char| c.is_whitespace())
}

/// Returns a string slice with any one trailing newline removed.
///
/// 'Newline' is precisely a newline character (`0xA`), perhaps
/// preceded by a carriage return (`0xD`). I.e., `'\r\n'` or
/// `'\n'`. (This is the same definition as used by [`str::lines`]
/// and `std::io::BufRead::lines`.)
//
// Unfortunately it doesn't seem to be possible to make the reference to `lines`
// a link. This:
// [`std::io::BufRead::lines`]: ../std/io/trait.BufRead.html#method.lines
// works in `core`, but fails with a broken link error in `std`, where
// this text is incorporated due to `String`'s `Deref`.
///
/// # Examples
///
/// ```
/// #![feature(trim_newline)]
/// use std::fmt::Write as _;
///
/// assert_eq!("Text", "Text".trim_newline());
/// assert_eq!("Text", "Text\n".trim_newline());
/// assert_eq!("Text", "Text\r\n".trim_newline());
/// assert_eq!("Text\r", "Text\r".trim_newline());
/// assert_eq!("Text\n", "Text\n\n".trim_newline());
/// assert_eq!("Text\n\r", "Text\n\r".trim_newline()); // LF CR is not a valid newline
///
/// let mut s = String::new();
/// writeln!(s, " Hi! ").unwrap();
/// assert_eq!(" Hi! ", s.trim_newline());
/// assert_eq!(" Hi! ", s.trim_newline().trim_newline());
/// ```
#[inline]
#[must_use = "this returns the trimmed string as a new slice, \
without modifying the original"]
#[unstable(feature = "trim_newline", issue = "none")]
pub fn trim_newline(&self) -> &str {
let s = self;
let Some(s) = s.strip_suffix('\n') else { return s };
let Some(s) = s.strip_suffix('\r') else { return s };
s
}

/// Returns a string slice with leading whitespace removed.
///
/// 'Whitespace' is defined according to the terms of the Unicode Derived
@@ -2499,10 +2558,8 @@ impl Default for &mut str {
impl_fn_for_zst! {
/// A nameable, cloneable fn type
#[derive(Clone)]
struct LinesAnyMap impl<'a> Fn = |line: &'a str| -> &'a str {
let l = line.len();
if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
else { line }
struct LinesMap impl<'a> Fn = |line: &'a str| -> &'a str {
line.trim_newline()
};

#[derive(Clone)]
20 changes: 20 additions & 0 deletions library/std/src/io/mod.rs
Original file line number Diff line number Diff line change
@@ -2281,6 +2281,26 @@ pub trait BufRead: Read {
/// assert_eq!(lines_iter.next(), None);
/// ```
///
/// Handling of some edge cases:
///
/// ```
/// use std::io::{BufRead, Cursor};
///
/// fn assert_splits_into(input: &str, expected: &[&str]) {
/// let got = Cursor::new(input).lines().collect::<Result<Vec<_>,_>>().unwrap();
/// assert_eq!(got, expected);
/// }
///
// Note: there is another copy of this set of corner cases, next to `core::str::lines()`.
// The two functions should behave the same way; consider editing those other doctests too.
/// assert_splits_into("", &[]);
/// assert_splits_into("\n", &[""]);
/// assert_splits_into("\n2nd", &["", "2nd"]);
/// assert_splits_into("\r\n", &[""]);
/// assert_splits_into("bare\r", &["bare\r"]);
/// assert_splits_into("bare\rcr", &["bare\rcr"]);
/// ```
///
/// # Errors
///
/// Each line of the iterator has the same error semantics as [`BufRead::read_line`].