Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid allocations in Display for OsStr and Path #42613

Merged
merged 2 commits into from
Jun 17, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 22 additions & 99 deletions src/liballoc/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ use core::hash;
use core::iter::{FromIterator, FusedIterator};
use core::ops::{self, Add, AddAssign, Index, IndexMut};
use core::ptr;
use core::str as core_str;
use core::str::pattern::Pattern;
use std_unicode::lossy;
use std_unicode::char::{decode_utf16, REPLACEMENT_CHARACTER};

use borrow::{Cow, ToOwned};
Expand Down Expand Up @@ -533,111 +533,34 @@ impl String {
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> Cow<'a, str> {
let mut i;
match str::from_utf8(v) {
Ok(s) => return Cow::Borrowed(s),
Err(e) => i = e.valid_up_to(),
}
let mut iter = lossy::Utf8Lossy::from_bytes(v).chunks();

const TAG_CONT_U8: u8 = 128;
const REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
let total = v.len();
fn unsafe_get(xs: &[u8], i: usize) -> u8 {
unsafe { *xs.get_unchecked(i) }
}
fn safe_get(xs: &[u8], i: usize, total: usize) -> u8 {
if i >= total { 0 } else { unsafe_get(xs, i) }
}
let (first_valid, first_broken) = if let Some(chunk) = iter.next() {
let lossy::Utf8LossyChunk { valid, broken } = chunk;
if valid.len() == v.len() {
debug_assert!(broken.is_empty());
return Cow::Borrowed(valid);
}
(valid, broken)
} else {
return Cow::Borrowed("");
};

let mut res = String::with_capacity(total);
const REPLACEMENT: &'static str = "\u{FFFD}";

if i > 0 {
unsafe { res.as_mut_vec().extend_from_slice(&v[..i]) };
let mut res = String::with_capacity(v.len());
res.push_str(first_valid);
if !first_broken.is_empty() {
res.push_str(REPLACEMENT);
}

// subseqidx is the index of the first byte of the subsequence we're
// looking at. It's used to copy a bunch of contiguous good codepoints
// at once instead of copying them one by one.
let mut subseqidx = i;

while i < total {
let i_ = i;
let byte = unsafe_get(v, i);
i += 1;

macro_rules! error { () => ({
unsafe {
if subseqidx != i_ {
res.as_mut_vec().extend_from_slice(&v[subseqidx..i_]);
}
subseqidx = i;
res.as_mut_vec().extend_from_slice(REPLACEMENT);
}
})}

if byte < 128 {
// subseqidx handles this
} else {
let w = core_str::utf8_char_width(byte);

match w {
2 => {
if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
error!();
continue;
}
i += 1;
}
3 => {
match (byte, safe_get(v, i, total)) {
(0xE0, 0xA0...0xBF) => (),
(0xE1...0xEC, 0x80...0xBF) => (),
(0xED, 0x80...0x9F) => (),
(0xEE...0xEF, 0x80...0xBF) => (),
_ => {
error!();
continue;
}
}
i += 1;
if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
error!();
continue;
}
i += 1;
}
4 => {
match (byte, safe_get(v, i, total)) {
(0xF0, 0x90...0xBF) => (),
(0xF1...0xF3, 0x80...0xBF) => (),
(0xF4, 0x80...0x8F) => (),
_ => {
error!();
continue;
}
}
i += 1;
if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
error!();
continue;
}
i += 1;
if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
error!();
continue;
}
i += 1;
}
_ => {
error!();
continue;
}
}
for lossy::Utf8LossyChunk { valid, broken } in iter {
res.push_str(valid);
if !broken.is_empty() {
res.push_str(REPLACEMENT);
}
}
if subseqidx < total {
unsafe { res.as_mut_vec().extend_from_slice(&v[subseqidx..total]) };
}

Cow::Owned(res)
}

Expand Down
18 changes: 12 additions & 6 deletions src/libstd/ffi/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
// except according to those terms.

use borrow::{Borrow, Cow};
use fmt::{self, Debug};
use fmt;
use mem;
use ops;
use cmp;
Expand Down Expand Up @@ -312,8 +312,8 @@ impl Default for OsString {
}

#[stable(feature = "rust1", since = "1.0.0")]
impl Debug for OsString {
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
impl fmt::Debug for OsString {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&**self, formatter)
}
}
Expand Down Expand Up @@ -669,9 +669,15 @@ impl Hash for OsStr {
}

#[stable(feature = "rust1", since = "1.0.0")]
impl Debug for OsStr {
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
self.inner.fmt(formatter)
impl fmt::Debug for OsStr {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&self.inner, formatter)
}
}

impl OsStr {
pub(crate) fn display(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(&self.inner, formatter)
}
}

Expand Down
8 changes: 4 additions & 4 deletions src/libstd/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2281,8 +2281,8 @@ impl AsRef<OsStr> for Path {

#[stable(feature = "rust1", since = "1.0.0")]
impl fmt::Debug for Path {
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
self.inner.fmt(formatter)
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&self.inner, formatter)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this just general cleanup, or was this required for some reason?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've made several similar changes for these reasons:

  • it is easier to visually check that correct trait is called (Debug or Display)
  • to make sure code on Windows and Redox (e. g. os_str::Slice) works correctly. I do not have access to Windows machine, so it's easier to write code uniformly (single fmt import and explicit trait names) than wait for build system job completion
  • and after all to unify all implementations in stack os_str, OsStr, Path

}
}

Expand Down Expand Up @@ -2314,14 +2314,14 @@ pub struct Display<'a> {
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> fmt::Debug for Display<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&self.path.to_string_lossy(), f)
fmt::Debug::fmt(&self.path, f)
}
}

#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> fmt::Display for Display<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(&self.path.to_string_lossy(), f)
self.path.inner.display(f)
}
}

Expand Down
27 changes: 20 additions & 7 deletions src/libstd/sys/redox/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
/// a `Vec<u8>`/`[u8]`.

use borrow::Cow;
use fmt::{self, Debug};
use fmt;
use str;
use mem;
use sys_common::{AsInner, IntoInner};
use std_unicode::lossy::Utf8Lossy;

#[derive(Clone, Hash)]
pub struct Buf {
Expand All @@ -26,15 +27,27 @@ pub struct Slice {
pub inner: [u8]
}

impl Debug for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
self.to_string_lossy().fmt(formatter)
impl fmt::Debug for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
}
}

impl Debug for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
self.as_slice().fmt(formatter)
impl fmt::Display for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
}
}

impl fmt::Debug for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(self.as_slice(), formatter)
}
}

impl fmt::Display for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(self.as_slice(), formatter)
}
}

Expand Down
27 changes: 20 additions & 7 deletions src/libstd/sys/unix/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
/// a `Vec<u8>`/`[u8]`.

use borrow::Cow;
use fmt::{self, Debug};
use fmt;
use str;
use mem;
use sys_common::{AsInner, IntoInner};
use std_unicode::lossy::Utf8Lossy;

#[derive(Clone, Hash)]
pub struct Buf {
Expand All @@ -26,15 +27,27 @@ pub struct Slice {
pub inner: [u8]
}

impl Debug for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
self.to_string_lossy().fmt(formatter)
impl fmt::Debug for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
}
}

impl Debug for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
self.as_slice().fmt(formatter)
impl fmt::Display for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
}
}

impl fmt::Debug for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(self.as_slice(), formatter)
}
}

impl fmt::Display for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(self.as_slice(), formatter)
}
}

Expand Down
26 changes: 19 additions & 7 deletions src/libstd/sys/windows/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
/// wrapper around the "WTF-8" encoding; see the `wtf8` module for more.

use borrow::Cow;
use fmt::{self, Debug};
use fmt;
use sys_common::wtf8::{Wtf8, Wtf8Buf};
use mem;
use sys_common::{AsInner, IntoInner};
Expand All @@ -34,19 +34,31 @@ impl AsInner<Wtf8> for Buf {
}
}

impl Debug for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
self.as_slice().fmt(formatter)
impl fmt::Debug for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(self.as_slice(), formatter)
}
}

impl fmt::Display for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(self.as_slice(), formatter)
}
}

pub struct Slice {
pub inner: Wtf8
}

impl Debug for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
self.inner.fmt(formatter)
impl fmt::Debug for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&self.inner, formatter)
}
}

impl fmt::Display for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(&self.inner, formatter)
}
}

Expand Down
Loading