Skip to content

Commit

Permalink
Auto merge of #42613 - stepancheg:lossy, r=alexcrichton
Browse files Browse the repository at this point in the history
Avoid allocations in Display for OsStr and Path

#38879
  • Loading branch information
bors committed Jun 17, 2017
2 parents 1169a1f + ac96fd7 commit 3438c0f
Show file tree
Hide file tree
Showing 12 changed files with 450 additions and 134 deletions.
121 changes: 22 additions & 99 deletions src/liballoc/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ use core::hash;
use core::iter::{FromIterator, FusedIterator};
use core::ops::{self, Add, AddAssign, Index, IndexMut};
use core::ptr;
use core::str as core_str;
use core::str::pattern::Pattern;
use std_unicode::lossy;
use std_unicode::char::{decode_utf16, REPLACEMENT_CHARACTER};

use borrow::{Cow, ToOwned};
Expand Down Expand Up @@ -533,111 +533,34 @@ impl String {
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> Cow<'a, str> {
let mut i;
match str::from_utf8(v) {
Ok(s) => return Cow::Borrowed(s),
Err(e) => i = e.valid_up_to(),
}
let mut iter = lossy::Utf8Lossy::from_bytes(v).chunks();

const TAG_CONT_U8: u8 = 128;
const REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
let total = v.len();
fn unsafe_get(xs: &[u8], i: usize) -> u8 {
unsafe { *xs.get_unchecked(i) }
}
fn safe_get(xs: &[u8], i: usize, total: usize) -> u8 {
if i >= total { 0 } else { unsafe_get(xs, i) }
}
let (first_valid, first_broken) = if let Some(chunk) = iter.next() {
let lossy::Utf8LossyChunk { valid, broken } = chunk;
if valid.len() == v.len() {
debug_assert!(broken.is_empty());
return Cow::Borrowed(valid);
}
(valid, broken)
} else {
return Cow::Borrowed("");
};

let mut res = String::with_capacity(total);
const REPLACEMENT: &'static str = "\u{FFFD}";

if i > 0 {
unsafe { res.as_mut_vec().extend_from_slice(&v[..i]) };
let mut res = String::with_capacity(v.len());
res.push_str(first_valid);
if !first_broken.is_empty() {
res.push_str(REPLACEMENT);
}

// subseqidx is the index of the first byte of the subsequence we're
// looking at. It's used to copy a bunch of contiguous good codepoints
// at once instead of copying them one by one.
let mut subseqidx = i;

while i < total {
let i_ = i;
let byte = unsafe_get(v, i);
i += 1;

macro_rules! error { () => ({
unsafe {
if subseqidx != i_ {
res.as_mut_vec().extend_from_slice(&v[subseqidx..i_]);
}
subseqidx = i;
res.as_mut_vec().extend_from_slice(REPLACEMENT);
}
})}

if byte < 128 {
// subseqidx handles this
} else {
let w = core_str::utf8_char_width(byte);

match w {
2 => {
if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
error!();
continue;
}
i += 1;
}
3 => {
match (byte, safe_get(v, i, total)) {
(0xE0, 0xA0...0xBF) => (),
(0xE1...0xEC, 0x80...0xBF) => (),
(0xED, 0x80...0x9F) => (),
(0xEE...0xEF, 0x80...0xBF) => (),
_ => {
error!();
continue;
}
}
i += 1;
if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
error!();
continue;
}
i += 1;
}
4 => {
match (byte, safe_get(v, i, total)) {
(0xF0, 0x90...0xBF) => (),
(0xF1...0xF3, 0x80...0xBF) => (),
(0xF4, 0x80...0x8F) => (),
_ => {
error!();
continue;
}
}
i += 1;
if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
error!();
continue;
}
i += 1;
if safe_get(v, i, total) & 192 != TAG_CONT_U8 {
error!();
continue;
}
i += 1;
}
_ => {
error!();
continue;
}
}
for lossy::Utf8LossyChunk { valid, broken } in iter {
res.push_str(valid);
if !broken.is_empty() {
res.push_str(REPLACEMENT);
}
}
if subseqidx < total {
unsafe { res.as_mut_vec().extend_from_slice(&v[subseqidx..total]) };
}

Cow::Owned(res)
}

Expand Down
18 changes: 12 additions & 6 deletions src/libstd/ffi/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
// except according to those terms.

use borrow::{Borrow, Cow};
use fmt::{self, Debug};
use fmt;
use mem;
use ops;
use cmp;
Expand Down Expand Up @@ -312,8 +312,8 @@ impl Default for OsString {
}

#[stable(feature = "rust1", since = "1.0.0")]
impl Debug for OsString {
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
impl fmt::Debug for OsString {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&**self, formatter)
}
}
Expand Down Expand Up @@ -669,9 +669,15 @@ impl Hash for OsStr {
}

#[stable(feature = "rust1", since = "1.0.0")]
impl Debug for OsStr {
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
self.inner.fmt(formatter)
impl fmt::Debug for OsStr {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&self.inner, formatter)
}
}

impl OsStr {
pub(crate) fn display(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(&self.inner, formatter)
}
}

Expand Down
8 changes: 4 additions & 4 deletions src/libstd/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2281,8 +2281,8 @@ impl AsRef<OsStr> for Path {

#[stable(feature = "rust1", since = "1.0.0")]
impl fmt::Debug for Path {
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
self.inner.fmt(formatter)
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&self.inner, formatter)
}
}

Expand Down Expand Up @@ -2314,14 +2314,14 @@ pub struct Display<'a> {
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> fmt::Debug for Display<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&self.path.to_string_lossy(), f)
fmt::Debug::fmt(&self.path, f)
}
}

#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> fmt::Display for Display<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(&self.path.to_string_lossy(), f)
self.path.inner.display(f)
}
}

Expand Down
27 changes: 20 additions & 7 deletions src/libstd/sys/redox/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
/// a `Vec<u8>`/`[u8]`.
use borrow::Cow;
use fmt::{self, Debug};
use fmt;
use str;
use mem;
use sys_common::{AsInner, IntoInner};
use std_unicode::lossy::Utf8Lossy;

#[derive(Clone, Hash)]
pub struct Buf {
Expand All @@ -26,15 +27,27 @@ pub struct Slice {
pub inner: [u8]
}

impl Debug for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
self.to_string_lossy().fmt(formatter)
impl fmt::Debug for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
}
}

impl Debug for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
self.as_slice().fmt(formatter)
impl fmt::Display for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
}
}

impl fmt::Debug for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(self.as_slice(), formatter)
}
}

impl fmt::Display for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(self.as_slice(), formatter)
}
}

Expand Down
27 changes: 20 additions & 7 deletions src/libstd/sys/unix/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
/// a `Vec<u8>`/`[u8]`.
use borrow::Cow;
use fmt::{self, Debug};
use fmt;
use str;
use mem;
use sys_common::{AsInner, IntoInner};
use std_unicode::lossy::Utf8Lossy;

#[derive(Clone, Hash)]
pub struct Buf {
Expand All @@ -26,15 +27,27 @@ pub struct Slice {
pub inner: [u8]
}

impl Debug for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
self.to_string_lossy().fmt(formatter)
impl fmt::Debug for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
}
}

impl Debug for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
self.as_slice().fmt(formatter)
impl fmt::Display for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(&Utf8Lossy::from_bytes(&self.inner), formatter)
}
}

impl fmt::Debug for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(self.as_slice(), formatter)
}
}

impl fmt::Display for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(self.as_slice(), formatter)
}
}

Expand Down
26 changes: 19 additions & 7 deletions src/libstd/sys/windows/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
/// wrapper around the "WTF-8" encoding; see the `wtf8` module for more.
use borrow::Cow;
use fmt::{self, Debug};
use fmt;
use sys_common::wtf8::{Wtf8, Wtf8Buf};
use mem;
use sys_common::{AsInner, IntoInner};
Expand All @@ -34,19 +34,31 @@ impl AsInner<Wtf8> for Buf {
}
}

impl Debug for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
self.as_slice().fmt(formatter)
impl fmt::Debug for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(self.as_slice(), formatter)
}
}

impl fmt::Display for Buf {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(self.as_slice(), formatter)
}
}

pub struct Slice {
pub inner: Wtf8
}

impl Debug for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
self.inner.fmt(formatter)
impl fmt::Debug for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&self.inner, formatter)
}
}

impl fmt::Display for Slice {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(&self.inner, formatter)
}
}

Expand Down
Loading

0 comments on commit 3438c0f

Please sign in to comment.