Skip to content

Commit

Permalink
Auto merge of #1564 - Aaron1011:readlink, r=RalfJung
Browse files Browse the repository at this point in the history
Implement `readlink`

Due to the truncating behavior of `readlink`, I was not able to
directly use any of the existing C-cstring helper functions.
  • Loading branch information
bors committed Oct 4, 2020
2 parents 9202f7d + 3aaab3d commit 60c1075
Show file tree
Hide file tree
Showing 7 changed files with 183 additions and 91 deletions.
143 changes: 74 additions & 69 deletions src/shims/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,72 +14,48 @@ use rustc_target::abi::LayoutOf;
use crate::*;

/// Represent how path separator conversion should be done.
enum Pathconversion {
pub enum PathConversion {
HostToTarget,
TargetToHost,
}

/// Perform path separator conversion if needed.
fn convert_path_separator<'a>(
os_str: Cow<'a, OsStr>,
target_os: &str,
direction: Pathconversion,
) -> Cow<'a, OsStr> {
#[cfg(windows)]
return if target_os == "windows" {
// Windows-on-Windows, all fine.
os_str
} else {
// Unix target, Windows host.
let (from, to) = match direction {
Pathconversion::HostToTarget => ('\\', '/'),
Pathconversion::TargetToHost => ('/', '\\'),
};
let converted = os_str
.encode_wide()
.map(|wchar| if wchar == from as u16 { to as u16 } else { wchar })
.collect::<Vec<_>>();
Cow::Owned(OsString::from_wide(&converted))
};
#[cfg(unix)]
return if target_os == "windows" {
// Windows target, Unix host.
let (from, to) = match direction {
Pathconversion::HostToTarget => ('/', '\\'),
Pathconversion::TargetToHost => ('\\', '/'),
};
let converted = os_str
.as_bytes()
.iter()
.map(|&wchar| if wchar == from as u8 { to as u8 } else { wchar })
.collect::<Vec<_>>();
Cow::Owned(OsString::from_vec(converted))
} else {
// Unix-on-Unix, all is fine.
os_str
};
#[cfg(unix)]
pub fn os_str_to_bytes<'a, 'tcx>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
Ok(os_str.as_bytes())
}

#[cfg(not(unix))]
pub fn os_str_to_bytes<'a, 'tcx>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
// On non-unix platforms the best we can do to transform bytes from/to OS strings is to do the
// intermediate transformation into strings. Which invalidates non-utf8 paths that are actually
// valid.
os_str
.to_str()
.map(|s| s.as_bytes())
.ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into())
}

#[cfg(unix)]
pub fn bytes_to_os_str<'a, 'tcx>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
Ok(OsStr::from_bytes(bytes))
}
#[cfg(not(unix))]
pub fn bytes_to_os_str<'a, 'tcx>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
let s = std::str::from_utf8(bytes)
.map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?;
Ok(OsStr::new(s))
}

impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {}
pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx> {

/// Helper function to read an OsString from a null-terminated sequence of bytes, which is what
/// the Unix APIs usually handle.
fn read_os_str_from_c_str<'a>(&'a self, scalar: Scalar<Tag>) -> InterpResult<'tcx, &'a OsStr>
where
'tcx: 'a,
'mir: 'a,
{
#[cfg(unix)]
fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
Ok(OsStr::from_bytes(bytes))
}
#[cfg(not(unix))]
fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
let s = std::str::from_utf8(bytes)
.map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?;
Ok(OsStr::new(s))
}

let this = self.eval_context_ref();
let bytes = this.memory.read_c_str(scalar)?;
bytes_to_os_str(bytes)
Expand Down Expand Up @@ -118,20 +94,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
scalar: Scalar<Tag>,
size: u64,
) -> InterpResult<'tcx, (bool, u64)> {
#[cfg(unix)]
fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
Ok(os_str.as_bytes())
}
#[cfg(not(unix))]
fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
// On non-unix platforms the best we can do to transform bytes from/to OS strings is to do the
// intermediate transformation into strings. Which invalidates non-utf8 paths that are actually
// valid.
os_str
.to_str()
.map(|s| s.as_bytes())
.ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into())
}

let bytes = os_str_to_bytes(os_str)?;
// If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required null
Expand Down Expand Up @@ -226,7 +188,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
let this = self.eval_context_ref();
let os_str = this.read_os_str_from_c_str(scalar)?;

Ok(match convert_path_separator(Cow::Borrowed(os_str), &this.tcx.sess.target.target.target_os, Pathconversion::TargetToHost) {
Ok(match this.convert_path_separator(Cow::Borrowed(os_str), PathConversion::TargetToHost) {
Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
Cow::Owned(y) => Cow::Owned(PathBuf::from(y)),
})
Expand All @@ -237,7 +199,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
let this = self.eval_context_ref();
let os_str = this.read_os_str_from_wide_str(scalar)?;

Ok(convert_path_separator(Cow::Owned(os_str), &this.tcx.sess.target.target.target_os, Pathconversion::TargetToHost).into_owned().into())
Ok(this.convert_path_separator(Cow::Owned(os_str), PathConversion::TargetToHost).into_owned().into())
}

/// Write a Path to the machine memory (as a null-terminated sequence of bytes),
Expand All @@ -249,7 +211,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
size: u64,
) -> InterpResult<'tcx, (bool, u64)> {
let this = self.eval_context_mut();
let os_str = convert_path_separator(Cow::Borrowed(path.as_os_str()), &this.tcx.sess.target.target.target_os, Pathconversion::HostToTarget);
let os_str = this.convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
this.write_os_str_to_c_str(&os_str, scalar, size)
}

Expand All @@ -262,7 +224,50 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
size: u64,
) -> InterpResult<'tcx, (bool, u64)> {
let this = self.eval_context_mut();
let os_str = convert_path_separator(Cow::Borrowed(path.as_os_str()), &this.tcx.sess.target.target.target_os, Pathconversion::HostToTarget);
let os_str = this.convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
this.write_os_str_to_wide_str(&os_str, scalar, size)
}

fn convert_path_separator<'a>(
&self,
os_str: Cow<'a, OsStr>,
direction: PathConversion,
) -> Cow<'a, OsStr> {
let this = self.eval_context_ref();
let target_os = &this.tcx.sess.target.target.target_os;
#[cfg(windows)]
return if target_os == "windows" {
// Windows-on-Windows, all fine.
os_str
} else {
// Unix target, Windows host.
let (from, to) = match direction {
PathConversion::HostToTarget => ('\\', '/'),
PathConversion::TargetToHost => ('/', '\\'),
};
let converted = os_str
.encode_wide()
.map(|wchar| if wchar == from as u16 { to as u16 } else { wchar })
.collect::<Vec<_>>();
Cow::Owned(OsString::from_wide(&converted))
};
#[cfg(unix)]
return if target_os == "windows" {
// Windows target, Unix host.
let (from, to) = match direction {
PathConversion::HostToTarget => ('/', '\\'),
PathConversion::TargetToHost => ('\\', '/'),
};
let converted = os_str
.as_bytes()
.iter()
.map(|&wchar| if wchar == from as u8 { to as u8 } else { wchar })
.collect::<Vec<_>>();
Cow::Owned(OsString::from_vec(converted))
} else {
// Unix-on-Unix, all is fine.
os_str
};
}
}

5 changes: 5 additions & 0 deletions src/shims/posix/foreign_items.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
let result = this.fdatasync(fd)?;
this.write_scalar(Scalar::from_i32(result), dest)?;
}
"readlink" => {
let &[pathname, buf, bufsize] = check_arg_count(args)?;
let result = this.readlink(pathname, buf, bufsize)?;
this.write_scalar(Scalar::from_machine_isize(result, this), dest)?;
}

// Allocation
"posix_memalign" => {
Expand Down
36 changes: 36 additions & 0 deletions src/shims/posix/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::fs::{read_dir, remove_dir, remove_file, rename, DirBuilder, File, FileT
use std::io::{self, Read, Seek, SeekFrom, Write};
use std::path::Path;
use std::time::SystemTime;
use std::borrow::Cow;

use log::trace;

Expand Down Expand Up @@ -1353,6 +1354,41 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
this.handle_not_found()
}
}

fn readlink(
&mut self,
pathname_op: OpTy<'tcx, Tag>,
buf_op: OpTy<'tcx, Tag>,
bufsize_op: OpTy<'tcx, Tag>
) -> InterpResult<'tcx, i64> {
let this = self.eval_context_mut();

this.check_no_isolation("readlink")?;

let pathname = this.read_path_from_c_str(this.read_scalar(pathname_op)?.check_init()?)?;
let buf = this.read_scalar(buf_op)?.check_init()?;
let bufsize = this.read_scalar(bufsize_op)?.to_machine_usize(this)?;

let result = std::fs::read_link(pathname);
match result {
Ok(resolved) => {
let resolved = this.convert_path_separator(Cow::Borrowed(resolved.as_ref()), crate::shims::os_str::PathConversion::HostToTarget);
let mut path_bytes = crate::shims::os_str::os_str_to_bytes(resolved.as_ref())?;
let bufsize: usize = bufsize.try_into().unwrap();
if path_bytes.len() > bufsize {
path_bytes = &path_bytes[..bufsize]
}
// 'readlink' truncates the resolved path if
// the provided buffer is not large enough.
this.memory.write_bytes(buf, path_bytes.iter().copied())?;
Ok(path_bytes.len().try_into().unwrap())
}
Err(e) => {
this.set_last_error_from_io_error(e)?;
Ok(-1)
}
}
}
}

/// Extracts the number of seconds and nanoseconds elapsed between `time` and the unix epoch when
Expand Down
70 changes: 68 additions & 2 deletions tests/run-pass/fs.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
// ignore-windows: File handling is not implemented yet
// compile-flags: -Zmiri-disable-isolation

#![feature(rustc_private)]

use std::fs::{
File, create_dir, OpenOptions, read_dir, remove_dir, remove_dir_all, remove_file, rename,
};
use std::io::{Read, Write, ErrorKind, Result, Seek, SeekFrom};
use std::ffi::CString;
use std::io::{Read, Write, Error, ErrorKind, Result, Seek, SeekFrom};
use std::path::{PathBuf, Path};

extern crate libc;


fn main() {
test_file();
test_file_clone();
Expand All @@ -19,10 +25,23 @@ fn main() {
test_errors();
test_rename();
test_directory();
test_dup_stdout_stderr();
}

fn tmp() -> PathBuf {
std::env::var("MIRI_TEMP").map(PathBuf::from).unwrap_or_else(|_| std::env::temp_dir())
std::env::var("MIRI_TEMP")
.map(|tmp| {
// MIRI_TEMP is set outside of our emulated
// program, so it may have path separators that don't
// correspond to our target platform. We normalize them here
// before constructing a `PathBuf`

#[cfg(windows)]
return PathBuf::from(tmp.replace("/", "\\"));

#[cfg(not(windows))]
return PathBuf::from(tmp.replace("\\", "/"));
}).unwrap_or_else(|_| std::env::temp_dir())
}

/// Prepare: compute filename and make sure the file does not exist.
Expand Down Expand Up @@ -215,6 +234,43 @@ fn test_symlink() {
let mut contents = Vec::new();
symlink_file.read_to_end(&mut contents).unwrap();
assert_eq!(bytes, contents.as_slice());


#[cfg(unix)]
{
use std::os::unix::ffi::OsStrExt;

let expected_path = path.as_os_str().as_bytes();

// Test that the expected string gets written to a buffer of proper
// length, and that a trailing null byte is not written.
let symlink_c_str = CString::new(symlink_path.as_os_str().as_bytes()).unwrap();
let symlink_c_ptr = symlink_c_str.as_ptr();

// Make the buf one byte larger than it needs to be,
// and check that the last byte is not overwritten.
let mut large_buf = vec![0xFF; expected_path.len() + 1];
let res = unsafe { libc::readlink(symlink_c_ptr, large_buf.as_mut_ptr().cast(), large_buf.len()) };
// Check that the resovled path was properly written into the buf.
assert_eq!(&large_buf[..(large_buf.len() - 1)], expected_path);
assert_eq!(large_buf.last(), Some(&0xFF));
assert_eq!(res, large_buf.len() as isize - 1);

// Test that the resolved path is truncated if the provided buffer
// is too small.
let mut small_buf = [0u8; 2];
let res = unsafe { libc::readlink(symlink_c_ptr, small_buf.as_mut_ptr().cast(), small_buf.len()) };
assert_eq!(small_buf, &expected_path[..small_buf.len()]);
assert_eq!(res, small_buf.len() as isize);

// Test that we report a proper error for a missing path.
let bad_path = CString::new("MIRI_MISSING_FILE_NAME").unwrap();
let res = unsafe { libc::readlink(bad_path.as_ptr(), small_buf.as_mut_ptr().cast(), small_buf.len()) };
assert_eq!(res, -1);
assert_eq!(Error::last_os_error().kind(), ErrorKind::NotFound);
}


// Test that metadata of a symbolic link is correct.
check_metadata(bytes, &symlink_path).unwrap();
// Test that the metadata of a symbolic link is correct when not following it.
Expand Down Expand Up @@ -292,3 +348,13 @@ fn test_directory() {
// Reading the metadata of a non-existent directory should fail with a "not found" error.
assert_eq!(ErrorKind::NotFound, check_metadata(&[], &dir_path).unwrap_err().kind());
}

fn test_dup_stdout_stderr() {
let bytes = b"hello dup fd\n";
unsafe {
let new_stdout = libc::fcntl(1, libc::F_DUPFD, 0);
let new_stderr = libc::fcntl(2, libc::F_DUPFD, 0);
libc::write(new_stdout, bytes.as_ptr() as *const libc::c_void, bytes.len());
libc::write(new_stderr, bytes.as_ptr() as *const libc::c_void, bytes.len());
}
}
File renamed without changes.
File renamed without changes.
20 changes: 0 additions & 20 deletions tests/run-pass/fs_libc.rs

This file was deleted.

0 comments on commit 60c1075

Please sign in to comment.