From fdd843662d976f64ce189dc517032b6d74ea95fe Mon Sep 17 00:00:00 2001 From: tormol Date: Tue, 19 Apr 2016 23:36:07 +0200 Subject: [PATCH 1/4] Fix warning on nightly feature(ascii) has been stabilized on nightly. In six weeks we can remove "unstable". --- src/lib.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index de76f4c..83c5c05 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,8 +12,6 @@ //! Operations on ASCII strings and characters -#![cfg_attr(feature = "unstable", feature(ascii))] - mod ascii; mod ascii_string; mod ascii_str; From 97f422527ca890ffd5f92887e57387ea0ee80ce9 Mon Sep 17 00:00:00 2001 From: tormol Date: Sun, 8 May 2016 21:00:00 +0200 Subject: [PATCH 2/4] Add IntoAscii to replace AsciiCast MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `AsciiCast` requires an explicit lifetime, which doesn't make sense for `Target=Ascii`: `fn by_value<'a,C:AsciiCast<'a,Target=Ascii>>(ch: C) {…}` doesn't compile, (because `'a` doesn't appear anywhere) so you have to borrow ch: `fn by_ref<'a,C:AsciiCast<'a,Target=Ascii>>(c: &'a C) {…}`. But then you have to explicitly borrow it in the caller: `by_ref(& 'a');` which looks bad. The names are also more in line with what std uses: xxx_unchecked() and Into/From. --- src/ascii.rs | 96 +++++++++++++++++++++++++++++++++++++++++++++++----- src/lib.rs | 2 +- 2 files changed, 89 insertions(+), 9 deletions(-) diff --git a/src/ascii.rs b/src/ascii.rs index 76e7f6f..a443163 100644 --- a/src/ascii.rs +++ b/src/ascii.rs @@ -1,6 +1,6 @@ use std::mem::transmute; use std::fmt; -#[cfg(feature="unstable")] +use std::error::Error; use std::ascii::AsciiExt; use AsciiCast; @@ -279,7 +279,7 @@ pub enum Ascii { } impl Ascii { - /// Constructs an Ascii character from a `char`. + /// Constructs an ASCII character from a `u8`, `char` or other character type. /// /// # Failure /// @@ -292,11 +292,13 @@ impl Ascii { /// assert_eq!(a.as_char(), 'g'); /// ``` #[inline] - pub fn from(ch: char) -> Result { - unsafe{if ch as u32 <= 0x7F { - return Ok(ch.to_ascii_nocheck()); - }} - Err(()) + pub fn from(ch: C) -> Result { + ch.into_ascii().map_err(|_| () ) + } + + /// Constructs an ASCII character from a `char` or `u8` without any checks. + pub unsafe fn from_unchecked(ch: C) -> Self { + ch.into_ascii_unchecked() } /// Constructs an Ascii character from a `u8`. @@ -530,10 +532,79 @@ impl<'a> AsciiCast<'a> for char { } } + +/// Error returned by `IntoAscii`. +#[derive(PartialEq)] +pub struct IntoAsciiError(()); + +const ERRORMSG_CHAR: &'static str = "not an ASCII character"; + +impl fmt::Debug for IntoAsciiError { + fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result { + write!(fmtr, "{}", ERRORMSG_CHAR) + } +} + +impl fmt::Display for IntoAsciiError { + fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result { + write!(fmtr, "{}", ERRORMSG_CHAR) + } +} + +impl Error for IntoAsciiError { + fn description(&self) -> &'static str { + ERRORMSG_CHAR + } +} + + +/// Convert `char`, `u8` and other character types to `Ascii`. +pub trait IntoAscii : AsciiExt { + /// Convert to `Ascii` without checking that it is an ASCII character. + unsafe fn into_ascii_unchecked(self) -> Ascii; + /// Convert to `Ascii`. + fn into_ascii(self) -> Result; +} + +#[cfg(feature = "unstable")] +impl IntoAscii for Ascii { + fn into_ascii(self) -> Result { + Ok(self) + } + unsafe fn into_ascii_unchecked(self) -> Ascii { + self + } +} + +impl IntoAscii for u8 { + fn into_ascii(self) -> Result { + unsafe{if self <= 0x7F { + return Ok(self.into_ascii_unchecked()); + }} + Err(IntoAsciiError(())) + } + unsafe fn into_ascii_unchecked(self) -> Ascii { + transmute(self) + } +} + +impl IntoAscii for char { + fn into_ascii(self) -> Result { + unsafe{if self as u32 <= 0x7F { + return Ok(self.into_ascii_unchecked()); + }} + Err(IntoAsciiError(())) + } + unsafe fn into_ascii_unchecked(self) -> Ascii { + (self as u8).into_ascii_unchecked() + } +} + + #[cfg(test)] mod tests { use AsciiCast; - use super::Ascii; + use super::{Ascii,IntoAscii,IntoAsciiError}; #[test] fn to_ascii() { @@ -544,6 +615,15 @@ mod tests { assert_eq!('λ'.to_ascii(), Err(())); } + #[test] + fn into_ascii() { + fn generic(c: C) -> Result { + c.into_ascii() + } + assert_eq!(generic('A'), Ok(Ascii::A)); + assert_eq!(generic(b'A'), Ok(Ascii::A)); + } + #[test] fn as_byte() { assert_eq!(65u8.to_ascii().unwrap().as_byte(), 65u8); diff --git a/src/lib.rs b/src/lib.rs index 83c5c05..e3325e7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,7 +19,7 @@ mod ascii_str; use std::borrow::Borrow; use std::ascii::AsciiExt; -pub use ascii::Ascii; +pub use ascii::{Ascii, IntoAscii, IntoAsciiError}; pub use ascii_string::AsciiString; pub use ascii_str::AsciiStr; From ee1e38cbf2073b52ae7c4b666cbb42722ebe2ef3 Mon Sep 17 00:00:00 2001 From: tormol Date: Sun, 8 May 2016 22:00:00 +0200 Subject: [PATCH 3/4] Add IntoAsciiString to replace OwnedAsciiCast The only differences are trait and method names. --- src/ascii_string.rs | 38 ++++++++++++++++++++++++++++++++++++++ src/lib.rs | 2 +- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/src/ascii_string.rs b/src/ascii_string.rs index 22ae34a..a257a46 100644 --- a/src/ascii_string.rs +++ b/src/ascii_string.rs @@ -551,6 +551,44 @@ impl IndexMut for AsciiString where AsciiStr: IndexMut { } } + +/// Convert vectors into `AsciiString`. +pub trait IntoAsciiString> : Sized+Borrow { + /// Convert to `AsciiString` without checking for non-ASCII characters. + unsafe fn into_ascii_unchecked(self) -> AsciiString; + /// Convert to `AsciiString`. + fn into_ascii(self) -> Result { + if self.borrow().is_ascii() { + Ok(unsafe { self.into_ascii_unchecked() }) + } else { + Err(self) + } + } +} + +#[cfg(feature = "unstable")] +impl IntoAsciiString for AsciiString { + fn into_ascii(self) -> Result { + Ok(self) + } + unsafe fn into_ascii_unchecked(self) -> AsciiString { + self + } +} + +impl IntoAsciiString<[u8]> for Vec { + unsafe fn into_ascii_unchecked(self) -> AsciiString { + AsciiString::from_bytes_unchecked(self) + } +} + +impl IntoAsciiString for String { + unsafe fn into_ascii_unchecked(self) -> AsciiString { + self.into_bytes().into_ascii_unchecked() + } +} + + #[cfg(test)] mod tests { use std::str::FromStr; diff --git a/src/lib.rs b/src/lib.rs index e3325e7..e0751e4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,7 +20,7 @@ use std::borrow::Borrow; use std::ascii::AsciiExt; pub use ascii::{Ascii, IntoAscii, IntoAsciiError}; -pub use ascii_string::AsciiString; +pub use ascii_string::{AsciiString, IntoAsciiString}; pub use ascii_str::AsciiStr; /// Trait for converting into an ascii type. From a6c80f71cebd9d721fa5414e0f965c46da5bb20f Mon Sep 17 00:00:00 2001 From: tormol Date: Sun, 8 May 2016 21:00:00 +0200 Subject: [PATCH 4/4] Add As(Mut)AsciiStr to replace AsciiCast The error type says where and why conversion failed. Two separate traits to follow std convention. Stops testing on 1.1.0 because the tests doesn't compile there: If we trust Rust to not introduce silent breaking changes, code that passes tests on stable should be correct on older versions as long as it compiles. --- .travis.yml | 2 +- src/ascii_str.rs | 238 +++++++++++++++++++++++++++++++++++++++++++++-- src/lib.rs | 2 +- 3 files changed, 231 insertions(+), 11 deletions(-) diff --git a/.travis.yml b/.travis.yml index 601982a..94d7526 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,7 +15,7 @@ before_script: script: - | travis-cargo build && - travis-cargo test && + travis-cargo --skip 1.1.0 test && travis-cargo --only stable doc after_success: diff --git a/src/ascii_str.rs b/src/ascii_str.rs index 56677e7..cb3e5b3 100644 --- a/src/ascii_str.rs +++ b/src/ascii_str.rs @@ -1,5 +1,6 @@ use std::{fmt, mem}; use std::ops::{Index, IndexMut, Range, RangeTo, RangeFrom, RangeFull}; +use std::error::Error; use std::ascii::AsciiExt; use AsciiCast; @@ -77,13 +78,27 @@ impl AsciiStr { { unsafe { if bytes.as_ref().is_ascii() { - Ok( mem::transmute(bytes.as_ref()) ) + Ok( Self::from_bytes_unchecked(bytes) ) } else { Err(()) } } } + /// Converts anything that can be represented as a byte slice to an `AsciiStr` without checking for non-ASCII characters.. + /// + /// # Examples + /// ``` + /// # use ascii::AsciiStr; + /// let foo = unsafe{ AsciiStr::from_bytes_unchecked("foo") }; + /// assert_eq!(foo.as_str(), "foo"); + /// ``` + pub unsafe fn from_bytes_unchecked<'a, B: ?Sized>(bytes: &'a B) -> &'a AsciiStr + where B: AsRef<[u8]> + { + mem::transmute(bytes.as_ref()) + } + /// Converts a borrowed string to a borrows ascii string. pub fn from_str<'a>(s: &'a str) -> Result<&'a AsciiStr, ()> { AsciiStr::from_bytes(s.as_bytes()) @@ -178,12 +193,6 @@ impl PartialOrd for AsciiStr { } */ -impl Default for &'static AsciiStr { - fn default() -> &'static AsciiStr { - unsafe { mem::transmute("") } - } -} - impl ToOwned for AsciiStr { type Owned = AsciiString; @@ -213,6 +222,11 @@ impl AsMut<[Ascii]> for AsciiStr { } } +impl Default for &'static AsciiStr { + fn default() -> &'static AsciiStr { + unsafe{ "".as_ascii_unchecked() } + } +} impl<'a> From<&'a[Ascii]> for &'a AsciiStr { fn from(slice: &[Ascii]) -> &AsciiStr { unsafe{ mem::transmute(slice) } @@ -342,10 +356,216 @@ impl<'a> AsciiCast<'a> for str { } } + +/// Error returned by `AsAsciiStr` +#[derive(Clone,Copy)] +pub struct AsAsciiStrError { + index: usize, + /// If less than 128, it was a byte >= 128 and not from a str + not_ascii: char, +} + +impl AsAsciiStrError { + /// Get the index of the first non-ASCII byte or character. + pub fn index(self) -> usize { + self.index + } + + /// Get the non-ASCII byte that caused the conversion to fail. + /// + /// If it was a `str` that was being converted, the first byte in the utf8 encoding is returned. + pub fn byte(self) -> u8 { + if (self.not_ascii as u32) < 128 { + self.not_ascii as u8 + 128 + } else { + // FIXME: use char::encode_utf8() when stabilized. + let mut s = String::with_capacity(4); + s.push(self.not_ascii); + s.bytes().next().unwrap() + } + } + + /// Get the character that caused the conversion from a `str` to fail. + /// + /// Returns `None` if the error was caused by a byte in a `[u8]` + pub fn char(self) -> Option { + match self.not_ascii as u32 { + 0...127 => None, // byte in a [u8] + _ => Some(self.not_ascii), + } + } +} + +impl fmt::Debug for AsAsciiStrError { + fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result { + if (self.not_ascii as u32) < 128 { + write!(fmtr, "b'\\x{:x}' at index {}", self.not_ascii as u8 + 128, self.index) + } else { + write!(fmtr, "'{}' at index {}", self.not_ascii, self.index) + } + } +} + +impl fmt::Display for AsAsciiStrError { + fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result { + if (self.not_ascii as u32) < 128 { + write!(fmtr, "the byte \\x{:x} at index {} is not ASCII", self.not_ascii as u8 + 128, self.index) + } else { + write!(fmtr, "the character {} at index {} is not ASCII", self.not_ascii, self.index) + } + } +} + +impl Error for AsAsciiStrError { + fn description(&self) -> &'static str { + if (self.not_ascii as u32) < 128 { + "one or more bytes are not ASCII" + } else { + "one or more characters are not ASCII" + } + } +} + + +/// Connvert mutable slices of bytes to `AsciiStr`. +pub trait AsAsciiStr : AsciiExt { + /// Convert to an ASCII slice without checking for non-ASCII characters. + unsafe fn as_ascii_unchecked(&self) -> &AsciiStr; + /// Convert to an ASCII slice. + fn as_ascii(&self) -> Result<&AsciiStr,AsAsciiStrError>; +} +/// Connvert mutable slices of bytes to `AsciiStr`. +pub trait AsMutAsciiStr : AsciiExt { + /// Convert to a mutable ASCII slice without checking for non-ASCII characters. + unsafe fn as_mut_ascii_unchecked(&mut self) -> &mut AsciiStr; + /// Convert to a mutable ASCII slice. + fn as_mut_ascii(&mut self) -> Result<&mut AsciiStr,AsAsciiStrError>; +} + +#[cfg(feature = "unstable")] +impl AsAsciiStr for AsciiStr { + fn as_ascii(&self) -> Result<&AsciiStr,AsAsciiStrError> { + Ok(self) + } + unsafe fn as_ascii_unchecked(&self) -> &AsciiStr { + self + } +} +#[cfg(feature = "unstable")] +impl AsMutAsciiStr for AsciiStr { + fn as_mut_ascii(&mut self) -> Result<&mut AsciiStr,AsAsciiStrError> { + Ok(self) + } + unsafe fn as_mut_ascii_unchecked(&mut self) -> &mut AsciiStr { + self + } +} + +// Cannot implement for [Ascii] since AsciiExt isn't implementet for it. + +impl AsAsciiStr for [u8] { + fn as_ascii(&self) -> Result<&AsciiStr,AsAsciiStrError> { + match self.iter().enumerate().find(|&(_,b)| *b > 127 ) { + Some((index, &byte)) => Err(AsAsciiStrError{ + index: index, + not_ascii: (byte - 128) as char, + }), + None => unsafe{ Ok(self.as_ascii_unchecked()) }, + } + } + unsafe fn as_ascii_unchecked(&self) -> &AsciiStr { + AsciiStr::from_bytes_unchecked(self) + } +} +impl AsMutAsciiStr for [u8] { + fn as_mut_ascii(&mut self) -> Result<&mut AsciiStr,AsAsciiStrError> { + match self.iter().enumerate().find(|&(_,b)| *b > 127 ) { + Some((index, &byte)) => Err(AsAsciiStrError{ + index: index, + not_ascii: (byte - 128) as char, + }), + None => unsafe{ Ok(self.as_mut_ascii_unchecked()) }, + } + } + unsafe fn as_mut_ascii_unchecked(&mut self) -> &mut AsciiStr { + mem::transmute(self) + } +} + +impl AsAsciiStr for str { + fn as_ascii(&self) -> Result<&AsciiStr,AsAsciiStrError> { + self.as_bytes().as_ascii().map_err(|err| AsAsciiStrError{ + not_ascii: self[err.index..].chars().next().unwrap(), + index: err.index, + }) + } + unsafe fn as_ascii_unchecked(&self) -> &AsciiStr { + mem::transmute(self) + } +} +impl AsMutAsciiStr for str { + fn as_mut_ascii(&mut self) -> Result<&mut AsciiStr,AsAsciiStrError> { + match self.bytes().position(|b| b > 127 ) { + Some(index) => Err(AsAsciiStrError{ + index: index, + not_ascii: self[index..].chars().next().unwrap(), + }), + None => unsafe{ Ok(self.as_mut_ascii_unchecked()) }, + } + } + unsafe fn as_mut_ascii_unchecked(&mut self) -> &mut AsciiStr { + mem::transmute(self) + } +} + + #[cfg(test)] mod tests { - use AsciiCast; - use super::AsciiStr; + use {AsciiCast,Ascii}; + use super::{AsciiStr,AsAsciiStr,AsMutAsciiStr,AsAsciiStrError}; + + /// Make Result<_,AsAsciiError> comparable. + pub fn tuplify(r: Result) -> Result { + r.map_err(|e| (e.index, e.not_ascii) ) + } + + #[test] + fn generic_as_ascii() { + fn generic(c: &C) -> Result<&AsciiStr,AsAsciiStrError> { + c.as_ascii() + } + let arr = [Ascii::A]; + let ascii_str = arr.as_ref().into(); + assert_eq!(tuplify(generic("A")), Ok(ascii_str)); + assert_eq!(tuplify(generic(&b"A"[..])), Ok(ascii_str)); + //assert_eq!(generic(ascii_str), Ok(ascii_str)); + } + + #[test] + fn as_ascii() { + let mut s: String = "abčd".to_string(); + let mut b: Vec = s.clone().into(); + assert_eq!(tuplify(s.as_str().as_ascii()), Err((2,'č'))); + assert_eq!(tuplify(s.as_mut_str().as_mut_ascii()), Err((2,'č'))); + let c = (b[2]-128) as char; + assert_eq!(tuplify(b.as_slice().as_ascii()), Err((2,c))); + assert_eq!(tuplify(b.as_mut_slice().as_mut_ascii()), Err((2,c))); + let mut a = [Ascii::a, Ascii::b]; + assert_eq!(tuplify((&s[..2]).as_ascii()), Ok((&a[..]).into())); + assert_eq!(tuplify((&b[..2]).as_ascii()), Ok((&a[..]).into())); + let a = Ok((&mut a[..]).into()); + assert_eq!(tuplify((&mut s[..2]).as_mut_ascii()), a); + assert_eq!(tuplify((&mut b[..2]).as_mut_ascii()), a); + } + + #[test] + fn as_ascii_error() { + let s = "abčd".as_ascii().unwrap_err(); + let b = "abčd".as_bytes().as_ascii().unwrap_err(); + assert_eq!(s.char(), Some('č')); + assert_eq!(b.char(), None); + assert_eq!(s.byte(), b.byte()); + } #[test] fn default() { diff --git a/src/lib.rs b/src/lib.rs index e0751e4..8ed7c8d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,7 +21,7 @@ use std::ascii::AsciiExt; pub use ascii::{Ascii, IntoAscii, IntoAsciiError}; pub use ascii_string::{AsciiString, IntoAsciiString}; -pub use ascii_str::AsciiStr; +pub use ascii_str::{AsciiStr, AsAsciiStr, AsMutAsciiStr, AsAsciiStrError}; /// Trait for converting into an ascii type. pub trait AsciiCast<'a>: AsciiExt {