Skip to content

Commit 1114b89

Browse files
committed
Add some convenience methods to go from CStr -> str
A common problem when working with FFI right now is converting from raw C strings into `&str` or `String`. Right now you're required to say something like let cstr = unsafe { CStr::from_ptr(ptr) }; let result = str::from_utf8(cstr.to_bytes()); This is slightly awkward, and is not particularly intuitive for people who haven't used the ffi module before. We can do a bit better by providing some convenience methods on CStr: fn to_str(&self) -> Result<&str, str::Utf8Error> unsafe fn to_str_unchecked(&self) -> &str fn to_string_lossy(&self) -> Cow<str> This will make it immediately apparent to new users of CStr how to get a string from a raw C string, so they can say: let s = unsafe { CStr::from_ptr(ptr).to_str_lossy() };
1 parent dd4dad8 commit 1114b89

File tree

1 file changed

+87
-0
lines changed

1 file changed

+87
-0
lines changed

src/libstd/ffi/c_str.rs

+87
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#![unstable(feature = "std_misc")]
1212

13+
use borrow::Cow;
1314
use convert::{Into, From};
1415
use cmp::{PartialEq, Eq, PartialOrd, Ord, Ordering};
1516
use error::Error;
@@ -22,6 +23,7 @@ use ops::Deref;
2223
use option::Option::{self, Some, None};
2324
use result::Result::{self, Ok, Err};
2425
use slice;
26+
use str;
2527
use string::String;
2628
use vec::Vec;
2729

@@ -114,6 +116,26 @@ pub struct CString {
114116
/// work(&s);
115117
/// }
116118
/// ```
119+
///
120+
/// Converting a foreign C string into a Rust `String`
121+
///
122+
/// ```no_run
123+
/// # #![feature(libc)]
124+
/// extern crate libc;
125+
/// use std::ffi::CStr;
126+
///
127+
/// extern { fn my_string() -> *const libc::c_char; }
128+
///
129+
/// fn my_string_safe() -> String {
130+
/// unsafe {
131+
/// CStr::from_ptr(my_string()).to_string_lossy().to_owned()
132+
/// }
133+
/// }
134+
///
135+
/// fn main() {
136+
/// println!("string: {}", my_string_safe());
137+
/// }
138+
/// ```
117139
#[derive(Hash)]
118140
#[stable(feature = "rust1", since = "1.0.0")]
119141
pub struct CStr {
@@ -328,6 +350,53 @@ impl CStr {
328350
pub fn to_bytes_with_nul(&self) -> &[u8] {
329351
unsafe { mem::transmute::<&[libc::c_char], &[u8]>(&self.inner) }
330352
}
353+
354+
/// Yields a `&str` slice if the `CStr` contains valid UTF-8.
355+
///
356+
/// This function will calculate the length of this string and check for
357+
/// UTF-8 validity, and then return the `&str` if it's valid.
358+
///
359+
/// > **Note**: This method is currently implemented to check for validity
360+
/// > after a 0-cost cast, but it is planned to alter its definition in the
361+
/// > future to perform the length calculation in addition to the UTF-8
362+
/// > check whenever this method is called.
363+
#[unstable(feature = "cstr", reason = "recently added")]
364+
pub fn to_str(&self) -> Result<&str, str::Utf8Error> {
365+
// NB: When CStr is changed to perform the length check in .to_bytes() instead of in
366+
// from_ptr(), it may be worth considering if this should be rewritten to do the UTF-8
367+
// check inline with the length calculation instead of doing it afterwards.
368+
str::from_utf8(self.to_bytes())
369+
}
370+
371+
/// Yields a `&str` slice without checking if the `CStr` contains valid UTF-8.
372+
///
373+
/// This function will calculate the length of this string (which normally
374+
/// requires a linear amount of work to be done) and then return the
375+
/// resulting slice as a `&str` without checking for UTF-8 validity.
376+
///
377+
/// > **Note:** This method is currently implemented as a 0-cost cast, but
378+
/// > it is planned to alter its definition in the future to perform the
379+
/// > length calculation whenever this method is called.
380+
#[unstable(feature = "cstr", reason = "recently added")]
381+
pub unsafe fn to_str_unchecked(&self) -> &str {
382+
str::from_utf8_unchecked(self.to_bytes())
383+
}
384+
385+
/// Converts a `CStr` into a `Cow<str>`.
386+
///
387+
/// This function will calculate the length of this string (which normally
388+
/// requires a linear amount of work to be done) and then return the
389+
/// resulting slice as a `Cow<str>`, replacing any invalid UTF-8 sequences
390+
/// with `U+FFFD REPLACEMENT CHARACTER`.
391+
///
392+
/// > **Note**: This method is currently implemented to check for validity
393+
/// > after a 0-cost cast, but it is planned to alter its definition in the
394+
/// > future to perform the length calculation in addition to the UTF-8
395+
/// > check whenever this method is called.
396+
#[unstable(feature = "cstr", reason = "recently added")]
397+
pub fn to_string_lossy(&self) -> Cow<str> {
398+
String::from_utf8_lossy(self.to_bytes())
399+
}
331400
}
332401

333402
#[stable(feature = "rust1", since = "1.0.0")]
@@ -356,6 +425,7 @@ mod tests {
356425
use prelude::v1::*;
357426
use super::*;
358427
use libc;
428+
use borrow::Cow::{Borrowed, Owned};
359429

360430
#[test]
361431
fn c_to_rust() {
@@ -405,4 +475,21 @@ mod tests {
405475
assert_eq!(s.to_bytes_with_nul(), b"12\0");
406476
}
407477
}
478+
479+
#[test]
480+
fn to_str() {
481+
let data = b"123\xE2\x80\xA6\0";
482+
let ptr = data.as_ptr() as *const libc::c_char;
483+
unsafe {
484+
assert_eq!(CStr::from_ptr(ptr).to_str(), Ok("123…"));
485+
assert_eq!(CStr::from_ptr(ptr).to_str_unchecked(), "123…");
486+
assert_eq!(CStr::from_ptr(ptr).to_string_lossy(), Borrowed("123…"));
487+
}
488+
let data = b"123\xE2\0";
489+
let ptr = data.as_ptr() as *const libc::c_char;
490+
unsafe {
491+
assert!(CStr::from_ptr(ptr).to_str().is_err());
492+
assert_eq!(CStr::from_ptr(ptr).to_string_lossy(), Owned::<str>(format!("123\u{FFFD}")));
493+
}
494+
}
408495
}

0 commit comments

Comments
 (0)